Order active window clauses for greater reuse of Sort nodes.

author Andrew Gierth <rhodiumtoad@postgresql.org>

Fri, 14 Sep 2018 16:35:42 +0000 (17:35 +0100)

committer Andrew Gierth <rhodiumtoad@postgresql.org>

Fri, 14 Sep 2018 16:35:42 +0000 (17:35 +0100)
author Andrew Gierth <rhodiumtoad@postgresql.org>
Fri, 14 Sep 2018 16:35:42 +0000 (17:35 +0100)
committer Andrew Gierth <rhodiumtoad@postgresql.org>
Fri, 14 Sep 2018 16:35:42 +0000 (17:35 +0100)
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c

index f3e18007086bc4b27c9afd7516ba8cfc520708df..55fd4c359b1734109f9d237250fbb2b61c3e2ac6 100644 (file)
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -1011,8 +1011,11 @@ list_append_unique_oid(List *list, Oid datum)
   * via equal().
   *
   * This is almost the same functionality as list_union(), but list1 is
- * modified in-place rather than being copied.  Note also that list2's cells
- * are not inserted in list1, so the analogy to list_concat() isn't perfect.
+ * modified in-place rather than being copied. However, callers of this
+ * function may have strict ordering expectations -- i.e. that the relative
+ * order of those list2 elements that are not duplicates is preserved. Note
+ * also that list2's cells are not inserted in list1, so the analogy to
+ * list_concat() isn't perfect.
   */
  List *
  list_concat_unique(List *list1, List *list2)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index e589471fee8b9b822ef3ed568c4baa0aa0875193..22c010c19e08612d89f687296c6b5a52f72ba6db 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -110,6 +110,17 @@ typedef struct
         int                *tleref_to_colnum_map;
  } grouping_sets_data;
  
+/*
+ * Temporary structure for use during WindowClause reordering in order to be
+ * be able to sort WindowClauses on partitioning/ordering prefix.
+ */
+typedef struct
+{
+       WindowClause *wc;
+       List       *uniqueOrder;        /* A List of unique ordering/partitioning
+                                                                * clauses per Window */
+} WindowClauseSortData;
+
  /* Local functions */
  static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
  static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -236,6 +247,7 @@ static void create_partitionwise_grouping_paths(PlannerInfo *root,
  static bool group_by_has_partkey(RelOptInfo *input_rel,
                                          List *targetList,
                                          List *groupClause);
+static int     common_prefix_cmp(const void *a, const void *b);
  
  
  /*****************************************************************************
@@ -5259,67 +5271,119 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
  static List *
  select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
  {
-       List       *result;
-       List       *actives;
+       List       *windowClause = root->parse->windowClause;
+       List       *result = NIL;
         ListCell   *lc;
+       int                     nActive = 0;
+       WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData)
+                                                                                  * list_length(windowClause));
  
-       /* First, make a list of the active windows */
-       actives = NIL;
-       foreach(lc, root->parse->windowClause)
+       /* First, construct an array of the active windows */
+       foreach(lc, windowClause)
         {
                 WindowClause *wc = lfirst_node(WindowClause, lc);
  
                 /* It's only active if wflists shows some related WindowFuncs */
                 Assert(wc->winref <= wflists->maxWinRef);
-               if (wflists->windowFuncs[wc->winref] != NIL)
-                       actives = lappend(actives, wc);
+               if (wflists->windowFuncs[wc->winref] == NIL)
+                       continue;
+
+               actives[nActive].wc = wc;       /* original clause */
+
+               /*
+                * For sorting, we want the list of partition keys followed by the
+                * list of sort keys. But pathkeys construction will remove duplicates
+                * between the two, so we can as well (even though we can't detect all
+                * of the duplicates, since some may come from ECs - that might mean
+                * we miss optimization chances here). We must, however, ensure that
+                * the order of entries is preserved with respect to the ones we do
+                * keep.
+                *
+                * partitionClause and orderClause had their own duplicates removed in
+                * parse analysis, so we're only concerned here with removing
+                * orderClause entries that also appear in partitionClause.
+                */
+               actives[nActive].uniqueOrder =
+                       list_concat_unique(list_copy(wc->partitionClause),
+                                                          wc->orderClause);
+               nActive++;
         }
  
         /*
-        * Now, ensure that windows with identical partitioning/ordering clauses
-        * are adjacent in the list.  This is required by the SQL standard, which
-        * says that only one sort is to be used for such windows, even if they
-        * are otherwise distinct (eg, different names or framing clauses).
+        * Sort active windows by their partitioning/ordering clauses, ignoring
+        * any framing clauses, so that the windows that need the same sorting are
+        * adjacent in the list. When we come to generate paths, this will avoid
+        * inserting additional Sort nodes.
          *
-        * There is room to be much smarter here, for example detecting whether
-        * one window's sort keys are a prefix of another's (so that sorting for
-        * the latter would do for the former), or putting windows first that
-        * match a sort order available for the underlying query.  For the moment
-        * we are content with meeting the spec.
-        */
-       result = NIL;
-       while (actives != NIL)
-       {
-               WindowClause *wc = linitial_node(WindowClause, actives);
-               ListCell   *prev;
-               ListCell   *next;
-
-               /* Move wc from actives to result */
-               actives = list_delete_first(actives);
-               result = lappend(result, wc);
-
-               /* Now move any matching windows from actives to result */
-               prev = NULL;
-               for (lc = list_head(actives); lc; lc = next)
-               {
-                       WindowClause *wc2 = lfirst_node(WindowClause, lc);
+        * This is how we implement a specific requirement from the SQL standard,
+        * which says that when two or more windows are order-equivalent (i.e.
+        * have matching partition and order clauses, even if their names or
+        * framing clauses differ), then all peer rows must be presented in the
+        * same order in all of them. If we allowed multiple sort nodes for such
+        * cases, we'd risk having the peer rows end up in different orders in
+        * equivalent windows due to sort instability. (See General Rule 4 of
+        * <window clause> in SQL2008 - SQL2016.)
+        *
+        * Additionally, if the entire list of clauses of one window is a prefix
+        * of another, put first the window with stronger sorting requirements.
+        * This way we will first sort for stronger window, and won't have to sort
+        * again for the weaker one.
+        */
+       qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
  
-                       next = lnext(lc);
-                       /* framing options are NOT to be compared here! */
-                       if (equal(wc->partitionClause, wc2->partitionClause) &&
-                               equal(wc->orderClause, wc2->orderClause))
-                       {
-                               actives = list_delete_cell(actives, lc, prev);
-                               result = lappend(result, wc2);
-                       }
-                       else
-                               prev = lc;
-               }
-       }
+       /* build ordered list of the original WindowClause nodes */
+       for (int i = 0; i < nActive; i++)
+               result = lappend(result, actives[i].wc);
+
+       pfree(actives);
  
         return result;
  }
  
+/*
+ * common_prefix_cmp
+ *       QSort comparison function for WindowClauseSortData
+ *
+ * Sort the windows by the required sorting clauses. First, compare the sort
+ * clauses themselves. Second, if one window's clauses are a prefix of another
+ * one's clauses, put the window with more sort clauses first.
+ */
+static int
+common_prefix_cmp(const void *a, const void *b)
+{
+       const WindowClauseSortData *wcsa = a;
+       const WindowClauseSortData *wcsb = b;
+       ListCell   *item_a;
+       ListCell   *item_b;
+
+       forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
+       {
+               SortGroupClause *sca = lfirst_node(SortGroupClause, item_a);
+               SortGroupClause *scb = lfirst_node(SortGroupClause, item_b);
+
+               if (sca->tleSortGroupRef > scb->tleSortGroupRef)
+                       return -1;
+               else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
+                       return 1;
+               else if (sca->sortop > scb->sortop)
+                       return -1;
+               else if (sca->sortop < scb->sortop)
+                       return 1;
+               else if (sca->nulls_first && !scb->nulls_first)
+                       return -1;
+               else if (!sca->nulls_first && scb->nulls_first)
+                       return 1;
+               /* no need to compare eqop, since it is fully determined by sortop */
+       }
+
+       if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
+               return -1;
+       else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
+               return 1;
+
+       return 0;
+}
+
  /*
   * make_window_input_target
   *       Generate appropriate PathTarget for initial input to WindowAgg nodes.
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out

index 562006a2b82839a298ed60884db2d7ad53676681..662d34865309218137a4ac6f23a2420e40cb3084 100644 (file)
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -504,9 +504,9 @@ SELECT sum(salary),
  FROM empsalary GROUP BY depname;
    sum  | row_number |  sum  
  -------+------------+-------
- 14600 |          3 | 14600
-  7400 |          2 | 22000
   25100 |          1 | 47100
+  7400 |          2 | 22000
+ 14600 |          3 | 14600
  (3 rows)
  
  -- identical windows with different names
@@ -2994,9 +2994,9 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum(
  FROM empsalary GROUP BY depname;
    sum  | row_number | filtered_sum |  depname  
  -------+------------+--------------+-----------
- 14600 |          3 |              | sales
-  7400 |          2 |         3500 | personnel
   25100 |          1 |        22600 | develop
+  7400 |          2 |         3500 | personnel
+ 14600 |          3 |              | sales
  (3 rows)
  
  -- Test pushdown of quals into a subquery containing window functions
@@ -3008,13 +3008,13 @@ SELECT * FROM
            min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary
     FROM empsalary) emp
  WHERE depname = 'sales';
-                             QUERY PLAN                              
----------------------------------------------------------------------
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
   Subquery Scan on emp
     ->  WindowAgg
-         ->  Sort
-               Sort Key: (((empsalary.depname)::text || 'A'::text))
-               ->  WindowAgg
+         ->  WindowAgg
+               ->  Sort
+                     Sort Key: (((empsalary.depname)::text || 'A'::text))
                       ->  Seq Scan on empsalary
                             Filter: ((depname)::text = 'sales'::text)
  (7 rows)
@@ -3027,19 +3027,53 @@ SELECT * FROM
            min(salary) OVER (PARTITION BY depname) depminsalary
     FROM empsalary) emp
  WHERE depname = 'sales';
-                        QUERY PLAN                         
------------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
   Subquery Scan on emp
     Filter: ((emp.depname)::text = 'sales'::text)
     ->  WindowAgg
           ->  Sort
-               Sort Key: empsalary.depname
+               Sort Key: empsalary.enroll_date
                 ->  WindowAgg
                       ->  Sort
-                           Sort Key: empsalary.enroll_date
+                           Sort Key: empsalary.depname
                             ->  Seq Scan on empsalary
  (9 rows)
  
+-- Test Sort node collapsing
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (SELECT depname,
+          sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+          min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary
+   FROM empsalary) emp
+WHERE depname = 'sales';
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Subquery Scan on emp
+   ->  WindowAgg
+         ->  WindowAgg
+               ->  Sort
+                     Sort Key: empsalary.empno, empsalary.enroll_date
+                     ->  Seq Scan on empsalary
+                           Filter: ((depname)::text = 'sales'::text)
+(7 rows)
+
+-- Test Sort node reordering
+EXPLAIN (COSTS OFF)
+SELECT
+  lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date),
+  lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno)
+FROM empsalary;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ WindowAgg
+   ->  WindowAgg
+         ->  Sort
+               Sort Key: depname, salary, enroll_date, empno
+               ->  Seq Scan on empsalary
+(5 rows)
+
  -- cleanup
  DROP TABLE empsalary;
  -- test user-defined window function with named args and default args
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql

index e2943a38f1e96c1befe504aae91d14a72bbd83b1..fc6d4cc903bb4c09b624acb0b987d61e16dde3fb 100644 (file)
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -892,6 +892,22 @@ SELECT * FROM
     FROM empsalary) emp
  WHERE depname = 'sales';
  
+-- Test Sort node collapsing
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (SELECT depname,
+          sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+          min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary
+   FROM empsalary) emp
+WHERE depname = 'sales';
+
+-- Test Sort node reordering
+EXPLAIN (COSTS OFF)
+SELECT
+  lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date),
+  lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno)
+FROM empsalary;
+
  -- cleanup
  DROP TABLE empsalary;
author	Andrew Gierth <rhodiumtoad@postgresql.org>
	Fri, 14 Sep 2018 16:35:42 +0000 (17:35 +0100)
committer	Andrew Gierth <rhodiumtoad@postgresql.org>
	Fri, 14 Sep 2018 16:35:42 +0000 (17:35 +0100)
src/backend/nodes/list.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/test/regress/expected/window.out		patch \| blob \| history
src/test/regress/sql/window.sql		patch \| blob \| history