From 2146f13408cdb85c738364fe8f7965209e08c6be Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 16 Jun 2014 15:55:05 -0400 Subject: [PATCH] Avoid recursion when processing simple lists of AND'ed or OR'ed clauses. Since most of the system thinks AND and OR are N-argument expressions anyway, let's have the grammar generate a representation of that form when dealing with input like "x AND y AND z AND ...", rather than generating a deeply-nested binary tree that just has to be flattened later by the planner. This avoids stack overflow in parse analysis when dealing with queries having more than a few thousand such clauses; and in any case it removes some rather unsightly inconsistencies, since some parts of parse analysis were generating N-argument ANDs/ORs already. It's still possible to get a stack overflow with weirdly parenthesized input, such as "x AND (y AND (z AND ( ... )))", but such cases are not mainstream usage. The maximum depth of parenthesization is already limited by Bison's stack in such cases, anyway, so that the limit is probably fairly platform-independent. Patch originally by Gurjeet Singh, heavily revised by me --- contrib/postgres_fdw/deparse.c | 3 - src/backend/nodes/nodeFuncs.c | 8 ++ src/backend/nodes/outfuncs.c | 9 -- src/backend/optimizer/prep/prepjointree.c | 6 +- src/backend/optimizer/prep/prepqual.c | 13 ++- src/backend/optimizer/util/clauses.c | 15 +-- src/backend/parser/gram.y | 110 +++++++++++++++------- src/backend/parser/parse_clause.c | 21 ++--- src/backend/parser/parse_expr.c | 98 ++++++++----------- src/include/nodes/parsenodes.h | 3 - src/include/nodes/primnodes.h | 8 +- src/test/regress/expected/rules.out | 2 +- 12 files changed, 155 insertions(+), 141 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index d7d9b9c77d..322138dd0c 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -1716,9 +1716,6 @@ deparseRelabelType(RelabelType *node, deparse_expr_cxt *context) /* * Deparse a BoolExpr node. - * - * Note: by the time we get here, AND and OR expressions have been flattened - * into N-argument form, so we'd better be prepared to deal with that. */ static void deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context) diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 5a98bfbc11..f4999c5be0 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -3047,6 +3047,14 @@ raw_expression_tree_walker(Node *node, /* operator name is deemed uninteresting */ } break; + case T_BoolExpr: + { + BoolExpr *expr = (BoolExpr *) node; + + if (walker(expr->args, context)) + return true; + } + break; case T_ColumnRef: /* we assume the fields contain nothing interesting */ break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 11c7486007..deff33f6f7 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2437,15 +2437,6 @@ _outAExpr(StringInfo str, const A_Expr *node) appendStringInfoChar(str, ' '); WRITE_NODE_FIELD(name); break; - case AEXPR_AND: - appendStringInfoString(str, " AND"); - break; - case AEXPR_OR: - appendStringInfoString(str, " OR"); - break; - case AEXPR_NOT: - appendStringInfoString(str, " NOT"); - break; case AEXPR_OP_ANY: appendStringInfoChar(str, ' '); WRITE_NODE_FIELD(name); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 776fe426c3..79521942a4 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -133,9 +133,9 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid); * transformations if any are found. * * This routine has to run before preprocess_expression(), so the quals - * clauses are not yet reduced to implicit-AND format. That means we need - * to recursively search through explicit AND clauses, which are - * probably only binary ANDs. We stop as soon as we hit a non-AND item. + * clauses are not yet reduced to implicit-AND format, and are not guaranteed + * to be AND/OR-flat either. That means we need to recursively search through + * explicit AND clauses. We stop as soon as we hit a non-AND item. */ void pull_up_sublinks(PlannerInfo *root) diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 2a24938d84..244e5dbc15 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -4,13 +4,12 @@ * Routines for preprocessing qualification expressions * * - * The parser regards AND and OR as purely binary operators, so a qual like - * (A = 1) OR (A = 2) OR (A = 3) ... - * will produce a nested parsetree - * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...))) - * In reality, the optimizer and executor regard AND and OR as N-argument - * operators, so this tree can be flattened to - * (OR (A = 1) (A = 2) (A = 3) ...) + * While the parser will produce flattened (N-argument) AND/OR trees from + * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause + * directly underneath another AND, or OR underneath OR, if the input was + * oddly parenthesized. Also, rule expansion and subquery flattening could + * produce such parsetrees. The planner wants to flatten all such cases + * to ensure consistent optimization behavior. * * Formerly, this module was responsible for doing the initial flattening, * but now we leave it to eval_const_expressions to do that since it has to diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 97dacaaac1..19b5cf7b61 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -3447,12 +3447,15 @@ simplify_or_arguments(List *args, List *unprocessed_args; /* - * Since the parser considers OR to be a binary operator, long OR lists - * become deeply nested expressions. We must flatten these into long - * argument lists of a single OR operator. To avoid blowing out the stack - * with recursion of eval_const_expressions, we resort to some tenseness - * here: we keep a list of not-yet-processed inputs, and handle flattening - * of nested ORs by prepending to the to-do list instead of recursing. + * We want to ensure that any OR immediately beneath another OR gets + * flattened into a single OR-list, so as to simplify later reasoning. + * + * To avoid stack overflow from recursion of eval_const_expressions, we + * resort to some tenseness here: we keep a list of not-yet-processed + * inputs, and handle flattening of nested ORs by prepending to the to-do + * list instead of recursing. Now that the parser generates N-argument + * ORs from simple lists, this complexity is probably less necessary than + * it once was, but we might as well keep the logic. */ unprocessed_args = list_copy(args); while (unprocessed_args) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 7b9895d61e..dd04b1a88a 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -151,6 +151,9 @@ static void insertSelectOptions(SelectStmt *stmt, static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); static Node *doNegate(Node *n, int location); static void doNegateFloat(Value *v); +static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location); +static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location); +static Node *makeNotExpr(Node *expr, int location); static Node *makeAArrayExpr(List *elements, int location); static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args, int location); @@ -10849,11 +10852,11 @@ a_expr: c_expr { $$ = $1; } { $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, NULL, @2); } | a_expr AND a_expr - { $$ = (Node *) makeA_Expr(AEXPR_AND, NIL, $1, $3, @2); } + { $$ = makeAndExpr($1, $3, @2); } | a_expr OR a_expr - { $$ = (Node *) makeA_Expr(AEXPR_OR, NIL, $1, $3, @2); } + { $$ = makeOrExpr($1, $3, @2); } | NOT a_expr - { $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, $2, @1); } + { $$ = makeNotExpr($2, @1); } | a_expr LIKE a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~~", $1, $3, @2); } @@ -11022,11 +11025,9 @@ a_expr: c_expr { $$ = $1; } } | a_expr IS NOT DISTINCT FROM a_expr %prec IS { - $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, - (Node *) makeSimpleA_Expr(AEXPR_DISTINCT, - "=", $1, $6, @2), - @2); - + $$ = makeNotExpr((Node *) makeSimpleA_Expr(AEXPR_DISTINCT, + "=", $1, $6, @2), + @2); } | a_expr IS OF '(' type_list ')' %prec IS { @@ -11044,43 +11045,43 @@ a_expr: c_expr { $$ = $1; } */ | a_expr BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN { - $$ = (Node *) makeA_Expr(AEXPR_AND, NIL, + $$ = makeAndExpr( (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $4, @2), (Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $6, @2), - @2); + @2); } | a_expr NOT BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN { - $$ = (Node *) makeA_Expr(AEXPR_OR, NIL, + $$ = makeOrExpr( (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $5, @2), (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $7, @2), - @2); + @2); } | a_expr BETWEEN SYMMETRIC b_expr AND b_expr %prec BETWEEN { - $$ = (Node *) makeA_Expr(AEXPR_OR, NIL, - (Node *) makeA_Expr(AEXPR_AND, NIL, + $$ = makeOrExpr( + makeAndExpr( (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $4, @2), (Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $6, @2), - @2), - (Node *) makeA_Expr(AEXPR_AND, NIL, + @2), + makeAndExpr( (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $6, @2), (Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $4, @2), - @2), - @2); + @2), + @2); } | a_expr NOT BETWEEN SYMMETRIC b_expr AND b_expr %prec BETWEEN { - $$ = (Node *) makeA_Expr(AEXPR_AND, NIL, - (Node *) makeA_Expr(AEXPR_OR, NIL, + $$ = makeAndExpr( + makeOrExpr( (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $5, @2), (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $7, @2), - @2), - (Node *) makeA_Expr(AEXPR_OR, NIL, + @2), + makeOrExpr( (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $7, @2), (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $5, @2), - @2), - @2); + @2), + @2); } | a_expr IN_P in_expr { @@ -11114,7 +11115,7 @@ a_expr: c_expr { $$ = $1; } n->operName = list_make1(makeString("=")); n->location = @3; /* Stick a NOT on top */ - $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, (Node *) n, @2); + $$ = makeNotExpr((Node *) n, @2); } else { @@ -11162,10 +11163,9 @@ a_expr: c_expr { $$ = $1; } } | a_expr IS NOT DOCUMENT_P %prec IS { - $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, - makeXmlExpr(IS_DOCUMENT, NULL, NIL, - list_make1($1), @2), - @2); + $$ = makeNotExpr(makeXmlExpr(IS_DOCUMENT, NULL, NIL, + list_make1($1), @2), + @2); } ; @@ -11216,8 +11216,9 @@ b_expr: c_expr } | b_expr IS NOT DISTINCT FROM b_expr %prec IS { - $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, - NULL, (Node *) makeSimpleA_Expr(AEXPR_DISTINCT, "=", $1, $6, @2), @2); + $$ = makeNotExpr((Node *) makeSimpleA_Expr(AEXPR_DISTINCT, + "=", $1, $6, @2), + @2); } | b_expr IS OF '(' type_list ')' %prec IS { @@ -11234,10 +11235,9 @@ b_expr: c_expr } | b_expr IS NOT DOCUMENT_P %prec IS { - $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, - makeXmlExpr(IS_DOCUMENT, NULL, NIL, - list_make1($1), @2), - @2); + $$ = makeNotExpr(makeXmlExpr(IS_DOCUMENT, NULL, NIL, + list_make1($1), @2), + @2); } ; @@ -13692,6 +13692,46 @@ doNegateFloat(Value *v) v->val.str = psprintf("-%s", oldval); } +static Node * +makeAndExpr(Node *lexpr, Node *rexpr, int location) +{ + /* Flatten "a AND b AND c ..." to a single BoolExpr on sight */ + if (IsA(lexpr, BoolExpr)) + { + BoolExpr *blexpr = (BoolExpr *) lexpr; + + if (blexpr->boolop == AND_EXPR) + { + blexpr->args = lappend(blexpr->args, rexpr); + return (Node *) blexpr; + } + } + return (Node *) makeBoolExpr(AND_EXPR, list_make2(lexpr, rexpr), location); +} + +static Node * +makeOrExpr(Node *lexpr, Node *rexpr, int location) +{ + /* Flatten "a OR b OR c ..." to a single BoolExpr on sight */ + if (IsA(lexpr, BoolExpr)) + { + BoolExpr *blexpr = (BoolExpr *) lexpr; + + if (blexpr->boolop == OR_EXPR) + { + blexpr->args = lappend(blexpr->args, rexpr); + return (Node *) blexpr; + } + } + return (Node *) makeBoolExpr(OR_EXPR, list_make2(lexpr, rexpr), location); +} + +static Node * +makeNotExpr(Node *expr, int location) +{ + return (Node *) makeBoolExpr(NOT_EXPR, list_make1(expr), location); +} + static Node * makeAArrayExpr(List *elements, int location) { diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index fcee1379c0..4931dcad3b 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -332,7 +332,8 @@ transformJoinUsingClause(ParseState *pstate, RangeTblEntry *leftRTE, RangeTblEntry *rightRTE, List *leftVars, List *rightVars) { - Node *result = NULL; + Node *result; + List *andargs = NIL; ListCell *lvars, *rvars; @@ -358,18 +359,16 @@ transformJoinUsingClause(ParseState *pstate, copyObject(lvar), copyObject(rvar), -1); - /* And combine into an AND clause, if multiple join columns */ - if (result == NULL) - result = (Node *) e; - else - { - A_Expr *a; - - a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e, -1); - result = (Node *) a; - } + /* Prepare to combine into an AND clause, if multiple join columns */ + andargs = lappend(andargs, e); } + /* Only need an AND if there's more than one join column */ + if (list_length(andargs) == 1) + result = (Node *) linitial(andargs); + else + result = (Node *) makeBoolExpr(AND_EXPR, andargs, -1); + /* * Since the references are already Vars, and are certainly from the input * relations, we don't have to go through the same pushups that diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 088224573f..83e20db276 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -41,15 +41,13 @@ bool Transform_null_equals = false; static Node *transformExprRecurse(ParseState *pstate, Node *expr); static Node *transformParamRef(ParseState *pstate, ParamRef *pref); static Node *transformAExprOp(ParseState *pstate, A_Expr *a); -static Node *transformAExprAnd(ParseState *pstate, A_Expr *a); -static Node *transformAExprOr(ParseState *pstate, A_Expr *a); -static Node *transformAExprNot(ParseState *pstate, A_Expr *a); static Node *transformAExprOpAny(ParseState *pstate, A_Expr *a); static Node *transformAExprOpAll(ParseState *pstate, A_Expr *a); static Node *transformAExprDistinct(ParseState *pstate, A_Expr *a); static Node *transformAExprNullIf(ParseState *pstate, A_Expr *a); static Node *transformAExprOf(ParseState *pstate, A_Expr *a); static Node *transformAExprIn(ParseState *pstate, A_Expr *a); +static Node *transformBoolExpr(ParseState *pstate, BoolExpr *a); static Node *transformFuncCall(ParseState *pstate, FuncCall *fn); static Node *transformCaseExpr(ParseState *pstate, CaseExpr *c); static Node *transformSubLink(ParseState *pstate, SubLink *sublink); @@ -223,15 +221,6 @@ transformExprRecurse(ParseState *pstate, Node *expr) case AEXPR_OP: result = transformAExprOp(pstate, a); break; - case AEXPR_AND: - result = transformAExprAnd(pstate, a); - break; - case AEXPR_OR: - result = transformAExprOr(pstate, a); - break; - case AEXPR_NOT: - result = transformAExprNot(pstate, a); - break; case AEXPR_OP_ANY: result = transformAExprOpAny(pstate, a); break; @@ -258,6 +247,10 @@ transformExprRecurse(ParseState *pstate, Node *expr) break; } + case T_BoolExpr: + result = transformBoolExpr(pstate, (BoolExpr *) expr); + break; + case T_FuncCall: result = transformFuncCall(pstate, (FuncCall *) expr); break; @@ -337,7 +330,6 @@ transformExprRecurse(ParseState *pstate, Node *expr) case T_DistinctExpr: case T_NullIfExpr: case T_ScalarArrayOpExpr: - case T_BoolExpr: case T_FieldSelect: case T_FieldStore: case T_RelabelType: @@ -918,46 +910,6 @@ transformAExprOp(ParseState *pstate, A_Expr *a) return result; } -static Node * -transformAExprAnd(ParseState *pstate, A_Expr *a) -{ - Node *lexpr = transformExprRecurse(pstate, a->lexpr); - Node *rexpr = transformExprRecurse(pstate, a->rexpr); - - lexpr = coerce_to_boolean(pstate, lexpr, "AND"); - rexpr = coerce_to_boolean(pstate, rexpr, "AND"); - - return (Node *) makeBoolExpr(AND_EXPR, - list_make2(lexpr, rexpr), - a->location); -} - -static Node * -transformAExprOr(ParseState *pstate, A_Expr *a) -{ - Node *lexpr = transformExprRecurse(pstate, a->lexpr); - Node *rexpr = transformExprRecurse(pstate, a->rexpr); - - lexpr = coerce_to_boolean(pstate, lexpr, "OR"); - rexpr = coerce_to_boolean(pstate, rexpr, "OR"); - - return (Node *) makeBoolExpr(OR_EXPR, - list_make2(lexpr, rexpr), - a->location); -} - -static Node * -transformAExprNot(ParseState *pstate, A_Expr *a) -{ - Node *rexpr = transformExprRecurse(pstate, a->rexpr); - - rexpr = coerce_to_boolean(pstate, rexpr, "NOT"); - - return (Node *) makeBoolExpr(NOT_EXPR, - list_make1(rexpr), - a->location); -} - static Node * transformAExprOpAny(ParseState *pstate, A_Expr *a) { @@ -1237,6 +1189,42 @@ transformAExprIn(ParseState *pstate, A_Expr *a) return result; } +static Node * +transformBoolExpr(ParseState *pstate, BoolExpr *a) +{ + List *args = NIL; + const char *opname; + ListCell *lc; + + switch (a->boolop) + { + case AND_EXPR: + opname = "AND"; + break; + case OR_EXPR: + opname = "OR"; + break; + case NOT_EXPR: + opname = "NOT"; + break; + default: + elog(ERROR, "unrecognized boolop: %d", (int) a->boolop); + opname = NULL; /* keep compiler quiet */ + break; + } + + foreach(lc, a->args) + { + Node *arg = (Node *) lfirst(lc); + + arg = transformExprRecurse(pstate, arg); + arg = coerce_to_boolean(pstate, arg, opname); + args = lappend(args, arg); + } + + return (Node *) makeBoolExpr(a->boolop, args, a->location); +} + static Node * transformFuncCall(ParseState *pstate, FuncCall *fn) { @@ -2428,10 +2416,6 @@ make_row_comparison_op(ParseState *pstate, List *opname, /* * For = and <> cases, we just combine the pairwise operators with AND or * OR respectively. - * - * Note: this is presently the only place where the parser generates - * BoolExpr with more than two arguments. Should be OK since the rest of - * the system thinks BoolExpr is N-argument anyway. */ if (rctype == ROWCOMPARE_EQ) return (Node *) makeBoolExpr(AND_EXPR, opexprs, location); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 7e560a19a3..9a68c87d0a 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -225,9 +225,6 @@ typedef struct ParamRef typedef enum A_Expr_Kind { AEXPR_OP, /* normal operator */ - AEXPR_AND, /* booleans - name field is unused */ - AEXPR_OR, - AEXPR_NOT, AEXPR_OP_ANY, /* scalar op ANY (array) */ AEXPR_OP_ALL, /* scalar op ALL (array) */ AEXPR_DISTINCT, /* IS DISTINCT FROM - name must be "=" */ diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 4f03ef9232..db8e87f0d0 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -458,12 +458,8 @@ typedef struct ScalarArrayOpExpr * BoolExpr - expression node for the basic Boolean operators AND, OR, NOT * * Notice the arguments are given as a List. For NOT, of course the list - * must always have exactly one element. For AND and OR, the executor can - * handle any number of arguments. The parser generally treats AND and OR - * as binary and so it typically only produces two-element lists, but the - * optimizer will flatten trees of AND and OR nodes to produce longer lists - * when possible. There are also a few special cases where more arguments - * can appear before optimization. + * must always have exactly one element. For AND and OR, there can be two + * or more arguments. */ typedef enum BoolExprType { diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 87870cf1af..ca56b47618 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2117,7 +2117,7 @@ shoe_ready| SELECT rsh.shoename, int4smaller(rsh.sh_avail, rsl.sl_avail) AS total_avail FROM shoe rsh, shoelace rsl - WHERE (((rsl.sl_color = rsh.slcolor) AND (rsl.sl_len_cm >= rsh.slminlen_cm)) AND (rsl.sl_len_cm <= rsh.slmaxlen_cm)); + WHERE ((rsl.sl_color = rsh.slcolor) AND (rsl.sl_len_cm >= rsh.slminlen_cm) AND (rsl.sl_len_cm <= rsh.slmaxlen_cm)); shoelace| SELECT s.sl_name, s.sl_avail, s.sl_color, -- 2.40.0