Fix mishandling of tSRFs at different nesting levels.

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 2 Feb 2017 21:38:13 +0000 (16:38 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 2 Feb 2017 21:38:18 +0000 (16:38 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Feb 2017 21:38:13 +0000 (16:38 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Feb 2017 21:38:18 +0000 (16:38 -0500)
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c

index cca5db88e2f1fd747c7839ce2794a97db08e8cae..5728f70c8b07cd6c3188a521939da98ef4f5e815 100644 (file)
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -20,10 +20,21 @@
  #include "optimizer/tlist.h"
  
  
+/* Test if an expression node represents a SRF call.  Beware multiple eval! */
+#define IS_SRF_CALL(node) \
+       ((IsA(node, FuncExpr) && ((FuncExpr *) (node))->funcretset) || \
+        (IsA(node, OpExpr) && ((OpExpr *) (node))->opretset))
+
+/* Workspace for split_pathtarget_walker */
  typedef struct
  {
-       List       *nextlevel_tlist;
-       bool            nextlevel_contains_srfs;
+       List       *input_target_exprs;         /* exprs available from input */
+       List       *level_srfs;         /* list of lists of SRF exprs */
+       List       *level_input_vars;           /* vars needed by SRFs of each level */
+       List       *level_input_srfs;           /* SRFs needed by SRFs of each level */
+       List       *current_input_vars;         /* vars needed in current subexpr */
+       List       *current_input_srfs;         /* SRFs needed in current subexpr */
+       int                     current_depth;  /* max SRF depth in current subexpr */
  } split_pathtarget_context;
  
  static bool split_pathtarget_walker(Node *node,
@@ -799,24 +810,27 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
   *             x, y, z
   * which satisfy the desired property.
   *
+ * Another example is
+ *             srf1(x), srf2(srf3(y))
+ * That must appear as-is in the top PathTarget, but below that we need
+ *             srf1(x), srf3(y)
+ * That is, each SRF must be computed at a level corresponding to the nesting
+ * depth of SRFs within its arguments.
+ *
   * In some cases, a SRF has already been evaluated in some previous plan level
   * and we shouldn't expand it again (that is, what we see in the target is
   * already meant as a reference to a lower subexpression).  So, don't expand
   * any tlist expressions that appear in input_target, if that's not NULL.
- * In principle we might need to consider matching subexpressions to
- * input_target, but for now it's not necessary because only ORDER BY and
- * GROUP BY expressions are at issue and those will look the same at both
- * plan levels.
   *
   * The outputs of this function are two parallel lists, one a list of
   * PathTargets and the other an integer list of bool flags indicating
- * whether the corresponding PathTarget contains any top-level SRFs.
+ * whether the corresponding PathTarget contains any evaluatable SRFs.
   * The lists are given in the order they'd need to be evaluated in, with
   * the "lowest" PathTarget first.  So the last list entry is always the
   * originally given PathTarget, and any entries before it indicate evaluation
   * levels that must be inserted below it.  The first list entry must not
- * contain any SRFs, since it will typically be attached to a plan node
- * that cannot evaluate SRFs.
+ * contain any SRFs (other than ones duplicating input_target entries), since
+ * it will typically be attached to a plan node that cannot evaluate SRFs.
   *
   * Note: using a list for the flags may seem like overkill, since there
   * are only a few possible patterns for which levels contain SRFs.
@@ -827,133 +841,283 @@ split_pathtarget_at_srfs(PlannerInfo *root,
                                                  PathTarget *target, PathTarget *input_target,
                                                  List **targets, List **targets_contain_srfs)
  {
-       /* Initialize output lists to empty; we prepend to them within loop */
-       *targets = *targets_contain_srfs = NIL;
+       split_pathtarget_context context;
+       int                     max_depth;
+       bool            need_extra_projection;
+       List       *prev_level_tlist;
+       ListCell   *lc,
+                          *lc1,
+                          *lc2,
+                          *lc3;
  
-       /* Loop to consider each level of PathTarget we need */
-       for (;;)
+       /*
+        * It's not unusual for planner.c to pass us two physically identical
+        * targets, in which case we can conclude without further ado that all
+        * expressions are available from the input.  (The logic below would
+        * arrive at the same conclusion, but much more tediously.)
+        */
+       if (target == input_target)
         {
-               bool            target_contains_srfs = false;
-               split_pathtarget_context context;
-               ListCell   *lc;
+               *targets = list_make1(target);
+               *targets_contain_srfs = list_make1_int(false);
+               return;
+       }
+
+       /* Pass any input_target exprs down to split_pathtarget_walker() */
+       context.input_target_exprs = input_target ? input_target->exprs : NIL;
+
+       /*
+        * Initialize with empty level-zero lists, and no levels after that.
+        * (Note: we could dispense with representing level zero explicitly, since
+        * it will never receive any SRFs, but then we'd have to special-case that
+        * level when we get to building result PathTargets.  Level zero describes
+        * the SRF-free PathTarget that will be given to the input plan node.)
+        */
+       context.level_srfs = list_make1(NIL);
+       context.level_input_vars = list_make1(NIL);
+       context.level_input_srfs = list_make1(NIL);
  
-               context.nextlevel_tlist = NIL;
-               context.nextlevel_contains_srfs = false;
+       /* Initialize data we'll accumulate across all the target expressions */
+       context.current_input_vars = NIL;
+       context.current_input_srfs = NIL;
+       max_depth = 0;
+       need_extra_projection = false;
+
+       /* Scan each expression in the PathTarget looking for SRFs */
+       foreach(lc, target->exprs)
+       {
+               Node       *node = (Node *) lfirst(lc);
  
                 /*
-                * Scan the PathTarget looking for SRFs.  Top-level SRFs are handled
-                * in this loop, ones lower down are found by split_pathtarget_walker.
+                * Find all SRFs and Vars (and Var-like nodes) in this expression, and
+                * enter them into appropriate lists within the context struct.
                  */
-               foreach(lc, target->exprs)
+               context.current_depth = 0;
+               split_pathtarget_walker(node, &context);
+
+               /* An expression containing no SRFs is of no further interest */
+               if (context.current_depth == 0)
+                       continue;
+
+               /*
+                * Track max SRF nesting depth over the whole PathTarget.  Also, if
+                * this expression establishes a new max depth, we no longer care
+                * whether previous expressions contained nested SRFs; we can handle
+                * any required projection for them in the final ProjectSet node.
+                */
+               if (max_depth < context.current_depth)
                 {
-                       Node       *node = (Node *) lfirst(lc);
+                       max_depth = context.current_depth;
+                       need_extra_projection = false;
+               }
+
+               /*
+                * If any maximum-depth SRF is not at the top level of its expression,
+                * we'll need an extra Result node to compute the top-level scalar
+                * expression.
+                */
+               if (max_depth == context.current_depth && !IS_SRF_CALL(node))
+                       need_extra_projection = true;
+       }
+
+       /*
+        * If we found no SRFs needing evaluation (maybe they were all present in
+        * input_target, or maybe they were all removed by const-simplification),
+        * then no ProjectSet is needed; fall out.
+        */
+       if (max_depth == 0)
+       {
+               *targets = list_make1(target);
+               *targets_contain_srfs = list_make1_int(false);
+               return;
+       }
+
+       /*
+        * The Vars and SRF outputs needed at top level can be added to the last
+        * level_input lists if we don't need an extra projection step.  If we do
+        * need one, add a SRF-free level to the lists.
+        */
+       if (need_extra_projection)
+       {
+               context.level_srfs = lappend(context.level_srfs, NIL);
+               context.level_input_vars = lappend(context.level_input_vars,
+                                                                                  context.current_input_vars);
+               context.level_input_srfs = lappend(context.level_input_srfs,
+                                                                                  context.current_input_srfs);
+       }
+       else
+       {
+               lc = list_nth_cell(context.level_input_vars, max_depth);
+               lfirst(lc) = list_concat(lfirst(lc), context.current_input_vars);
+               lc = list_nth_cell(context.level_input_srfs, max_depth);
+               lfirst(lc) = list_concat(lfirst(lc), context.current_input_srfs);
+       }
+
+       /*
+        * Now construct the output PathTargets.  The original target can be used
+        * as-is for the last one, but we need to construct a new SRF-free target
+        * representing what the preceding plan node has to emit, as well as a
+        * target for each intermediate ProjectSet node.
+        */
+       *targets = *targets_contain_srfs = NIL;
+       prev_level_tlist = NIL;
+
+       forthree(lc1, context.level_srfs,
+                        lc2, context.level_input_vars,
+                        lc3, context.level_input_srfs)
+       {
+               List       *level_srfs = (List *) lfirst(lc1);
+               PathTarget *ntarget;
+
+               if (lnext(lc1) == NULL)
+               {
+                       ntarget = target;
+               }
+               else
+               {
+                       ntarget = create_empty_pathtarget();
  
                         /*
-                        * A tlist item that is just a reference to an expression already
-                        * computed in input_target need not be evaluated here, so just
-                        * make sure it's included in the next PathTarget.
+                        * This target should actually evaluate any SRFs of the current
+                        * level, and it needs to propagate forward any Vars needed by
+                        * later levels, as well as SRFs computed earlier and needed by
+                        * later levels.  We rely on add_new_columns_to_pathtarget() to
+                        * remove duplicate items.  Also, for safety, make a separate copy
+                        * of each item for each PathTarget.
                          */
-                       if (input_target && list_member(input_target->exprs, node))
+                       add_new_columns_to_pathtarget(ntarget, copyObject(level_srfs));
+                       for_each_cell(lc, lnext(lc2))
                         {
-                               context.nextlevel_tlist = lappend(context.nextlevel_tlist, node);
-                               continue;
-                       }
+                               List       *input_vars = (List *) lfirst(lc);
  
-                       /* Else, we need to compute this expression. */
-                       if (IsA(node, FuncExpr) &&
-                               ((FuncExpr *) node)->funcretset)
-                       {
-                               /* Top-level SRF: it can be evaluated here */
-                               target_contains_srfs = true;
-                               /* Recursively examine SRF's inputs */
-                               split_pathtarget_walker((Node *) ((FuncExpr *) node)->args,
-                                                                               &context);
+                               add_new_columns_to_pathtarget(ntarget, copyObject(input_vars));
                         }
-                       else if (IsA(node, OpExpr) &&
-                                        ((OpExpr *) node)->opretset)
+                       for_each_cell(lc, lnext(lc3))
                         {
-                               /* Same as above, but for set-returning operator */
-                               target_contains_srfs = true;
-                               split_pathtarget_walker((Node *) ((OpExpr *) node)->args,
-                                                                               &context);
-                       }
-                       else
-                       {
-                               /* Not a top-level SRF, so recursively examine expression */
-                               split_pathtarget_walker(node, &context);
+                               List       *input_srfs = (List *) lfirst(lc);
+                               ListCell   *lcx;
+
+                               foreach(lcx, input_srfs)
+                               {
+                                       Node       *srf = (Node *) lfirst(lcx);
+
+                                       if (list_member(prev_level_tlist, srf))
+                                               add_new_column_to_pathtarget(ntarget, copyObject(srf));
+                               }
                         }
+                       set_pathtarget_cost_width(root, ntarget);
                 }
  
                 /*
-                * Prepend current target and associated flag to output lists.
+                * Add current target and does-it-compute-SRFs flag to output lists.
                  */
-               *targets = lcons(target, *targets);
-               *targets_contain_srfs = lcons_int(target_contains_srfs,
-                                                                                 *targets_contain_srfs);
+               *targets = lappend(*targets, ntarget);
+               *targets_contain_srfs = lappend_int(*targets_contain_srfs,
+                                                                                       (level_srfs != NIL));
  
-               /*
-                * Done if we found no SRFs anywhere in this target; the tentative
-                * tlist we built for the next level can be discarded.
-                */
-               if (!target_contains_srfs && !context.nextlevel_contains_srfs)
-                       break;
-
-               /*
-                * Else build the next PathTarget down, and loop back to process it.
-                * Copy the subexpressions to make sure PathTargets don't share
-                * substructure (might be unnecessary, but be safe); and drop any
-                * duplicate entries in the sub-targetlist.
-                */
-               target = create_empty_pathtarget();
-               add_new_columns_to_pathtarget(target,
-                                                          (List *) copyObject(context.nextlevel_tlist));
-               set_pathtarget_cost_width(root, target);
+               /* Remember this level's output for next pass */
+               prev_level_tlist = ntarget->exprs;
         }
  }
  
-/* Recursively examine expressions for split_pathtarget_at_srfs */
+/*
+ * Recursively examine expressions for split_pathtarget_at_srfs.
+ *
+ * Note we make no effort here to prevent duplicate entries in the output
+ * lists.  Duplicates will be gotten rid of later.
+ */
  static bool
  split_pathtarget_walker(Node *node, split_pathtarget_context *context)
  {
         if (node == NULL)
                 return false;
+
+       /*
+        * A subexpression that matches an expression already computed in
+        * input_target can be treated like a Var (which indeed it will be after
+        * setrefs.c gets done with it), even if it's actually a SRF.  Record it
+        * as being needed for the current expression, and ignore any
+        * substructure.
+        */
+       if (list_member(context->input_target_exprs, node))
+       {
+               context->current_input_vars = lappend(context->current_input_vars,
+                                                                                         node);
+               return false;
+       }
+
+       /*
+        * Vars and Var-like constructs are expected to be gotten from the input,
+        * too.  We assume that these constructs cannot contain any SRFs (if one
+        * does, there will be an executor failure from a misplaced SRF).
+        */
         if (IsA(node, Var) ||
                 IsA(node, PlaceHolderVar) ||
                 IsA(node, Aggref) ||
                 IsA(node, GroupingFunc) ||
                 IsA(node, WindowFunc))
         {
-               /*
-                * Pass these items down to the child plan level for evaluation.
-                *
-                * We assume that these constructs cannot contain any SRFs (if one
-                * does, there will be an executor failure from a misplaced SRF).
-                */
-               context->nextlevel_tlist = lappend(context->nextlevel_tlist, node);
-
-               /* Having done that, we need not examine their sub-structure */
+               context->current_input_vars = lappend(context->current_input_vars,
+                                                                                         node);
                 return false;
         }
-       else if ((IsA(node, FuncExpr) &&
-                         ((FuncExpr *) node)->funcretset) ||
-                        (IsA(node, OpExpr) &&
-                         ((OpExpr *) node)->opretset))
+
+       /*
+        * If it's a SRF, recursively examine its inputs, determine its level, and
+        * make appropriate entries in the output lists.
+        */
+       if (IS_SRF_CALL(node))
         {
+               List       *save_input_vars = context->current_input_vars;
+               List       *save_input_srfs = context->current_input_srfs;
+               int                     save_current_depth = context->current_depth;
+               int                     srf_depth;
+               ListCell   *lc;
+
+               context->current_input_vars = NIL;
+               context->current_input_srfs = NIL;
+               context->current_depth = 0;
+
+               (void) expression_tree_walker(node, split_pathtarget_walker,
+                                                                         (void *) context);
+
+               /* Depth is one more than any SRF below it */
+               srf_depth = context->current_depth + 1;
+
+               /* If new record depth, initialize another level of output lists */
+               if (srf_depth >= list_length(context->level_srfs))
+               {
+                       context->level_srfs = lappend(context->level_srfs, NIL);
+                       context->level_input_vars = lappend(context->level_input_vars, NIL);
+                       context->level_input_srfs = lappend(context->level_input_srfs, NIL);
+               }
+
+               /* Record this SRF as needing to be evaluated at appropriate level */
+               lc = list_nth_cell(context->level_srfs, srf_depth);
+               lfirst(lc) = lappend(lfirst(lc), node);
+
+               /* Record its inputs as being needed at the same level */
+               lc = list_nth_cell(context->level_input_vars, srf_depth);
+               lfirst(lc) = list_concat(lfirst(lc), context->current_input_vars);
+               lc = list_nth_cell(context->level_input_srfs, srf_depth);
+               lfirst(lc) = list_concat(lfirst(lc), context->current_input_srfs);
+
                 /*
-                * Pass SRFs down to the child plan level for evaluation, and mark
-                * that it contains SRFs.  (We are not at top level of our own tlist,
-                * else this would have been picked up by split_pathtarget_at_srfs.)
+                * Restore caller-level state and update it for presence of this SRF.
+                * Notice we report the SRF itself as being needed for evaluation of
+                * surrounding expression.
                  */
-               context->nextlevel_tlist = lappend(context->nextlevel_tlist, node);
-               context->nextlevel_contains_srfs = true;
+               context->current_input_vars = save_input_vars;
+               context->current_input_srfs = lappend(save_input_srfs, node);
+               context->current_depth = Max(save_current_depth, srf_depth);
  
-               /* Inputs to the SRF need not be considered here, so we're done */
+               /* We're done here */
                 return false;
         }
  
         /*
-        * Otherwise, the node is evaluatable within the current PathTarget, so
-        * recurse to examine its inputs.
+        * Otherwise, the node is a scalar (non-set) expression, so recurse to
+        * examine its inputs.
          */
         return expression_tree_walker(node, split_pathtarget_walker,
                                                                   (void *) context);
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index 47afdc335e1d562d8a510e84fa2ae2553e92fbb3..1ad4ad47b2c011c770f8ac823d05e04f16c63952 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -853,7 +853,7 @@ select * from int4_tbl o where (f1, f1) in
                 ->  Result
                       Output: i.f1, ((generate_series(1, 2)) / 10)
                       ->  ProjectSet
-                           Output: i.f1, generate_series(1, 2)
+                           Output: generate_series(1, 2), i.f1
                             ->  HashAggregate
                                   Output: i.f1
                                   Group Key: i.f1
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out

index dca51a842faab6c82df11ad2de69d5bcb93238e6..0eeaf9e8300d335003369abf52f714311d5d7554 100644 (file)
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -53,6 +53,29 @@ SELECT generate_series(generate_series(1,3), generate_series(2, 4));
                 4
  (6 rows)
  
+-- check proper nesting of SRFs in different expressions
+explain (verbose, costs off)
+SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4);
+                                   QUERY PLAN                                   
+--------------------------------------------------------------------------------
+ ProjectSet
+   Output: generate_series(1, (generate_series(1, 3))), (generate_series(2, 4))
+   ->  ProjectSet
+         Output: generate_series(1, 3), generate_series(2, 4)
+         ->  Result
+(5 rows)
+
+SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4);
+ generate_series | generate_series 
+-----------------+-----------------
+               1 |               2
+               1 |               3
+               2 |               3
+               1 |               4
+               2 |               4
+               3 |               4
+(6 rows)
+
  CREATE TABLE few(id int, dataa text, datab text);
  INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar');
  -- SRF output order of sorting is maintained, if SRF is not referenced
@@ -518,6 +541,69 @@ SELECT |@|ARRAY[1,2,3];
          3
  (3 rows)
  
+-- Some fun cases involving duplicate SRF calls
+explain (verbose, costs off)
+select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Result
+   Output: (generate_series(1, 3)), ((generate_series(1, 3)) + 1)
+   ->  ProjectSet
+         Output: generate_series(1, 3)
+         ->  Result
+(5 rows)
+
+select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1;
+ x | xp1 
+---+-----
+ 1 |   2
+ 2 |   3
+ 3 |   4
+(3 rows)
+
+explain (verbose, costs off)
+select generate_series(1,3)+1 order by generate_series(1,3);
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Sort
+   Output: (((generate_series(1, 3)) + 1)), (generate_series(1, 3))
+   Sort Key: (generate_series(1, 3))
+   ->  Result
+         Output: ((generate_series(1, 3)) + 1), (generate_series(1, 3))
+         ->  ProjectSet
+               Output: generate_series(1, 3)
+               ->  Result
+(8 rows)
+
+select generate_series(1,3)+1 order by generate_series(1,3);
+ ?column? 
+----------
+        2
+        3
+        4
+(3 rows)
+
+-- Check that SRFs of same nesting level run in lockstep
+explain (verbose, costs off)
+select generate_series(1,3) as x, generate_series(3,6) + 1 as y;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Result
+   Output: (generate_series(1, 3)), ((generate_series(3, 6)) + 1)
+   ->  ProjectSet
+         Output: generate_series(1, 3), generate_series(3, 6)
+         ->  Result
+(5 rows)
+
+select generate_series(1,3) as x, generate_series(3,6) + 1 as y;
+ x | y 
+---+---
+ 1 | 4
+ 2 | 5
+ 3 | 6
+   | 7
+(4 rows)
+
  -- Clean up
  DROP TABLE few;
  DROP TABLE fewmore;
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql

index ff2fa3eb9a97608b131d0f1e3bf0ecf8587eea13..e627bb99ed963352bf8158de512b1d7705ba2b83 100644 (file)
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -17,6 +17,11 @@ SELECT generate_series(1, generate_series(1, 3));
  -- srf, with two SRF arguments
  SELECT generate_series(generate_series(1,3), generate_series(2, 4));
  
+-- check proper nesting of SRFs in different expressions
+explain (verbose, costs off)
+SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4);
+SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4);
+
  CREATE TABLE few(id int, dataa text, datab text);
  INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar');
  
@@ -129,6 +134,19 @@ SELECT (SELECT generate_series(1,3) LIMIT 1 OFFSET g.i) FROM generate_series(0,3
  CREATE OPERATOR |@| (PROCEDURE = unnest, RIGHTARG = ANYARRAY);
  SELECT |@|ARRAY[1,2,3];
  
+-- Some fun cases involving duplicate SRF calls
+explain (verbose, costs off)
+select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1;
+select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1;
+explain (verbose, costs off)
+select generate_series(1,3)+1 order by generate_series(1,3);
+select generate_series(1,3)+1 order by generate_series(1,3);
+
+-- Check that SRFs of same nesting level run in lockstep
+explain (verbose, costs off)
+select generate_series(1,3) as x, generate_series(3,6) + 1 as y;
+select generate_series(1,3) as x, generate_series(3,6) + 1 as y;
+
  -- Clean up
  DROP TABLE few;
  DROP TABLE fewmore;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 2 Feb 2017 21:38:13 +0000 (16:38 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 2 Feb 2017 21:38:18 +0000 (16:38 -0500)
src/backend/optimizer/util/tlist.c		patch \| blob \| history
src/test/regress/expected/subselect.out		patch \| blob \| history
src/test/regress/expected/tsrf.out		patch \| blob \| history
src/test/regress/sql/tsrf.sql		patch \| blob \| history