]> granicus.if.org Git - postgresql/commitdiff
Disable physical tlist if any Var would need multiple sortgroupref labels.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 26 May 2016 18:52:24 +0000 (14:52 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 26 May 2016 18:52:30 +0000 (14:52 -0400)
As part of upper planner pathification (commit 3fc6e2d7f5b652b4) I redid
createplan.c's approach to the physical-tlist optimization, in which scan
nodes are allowed to return exactly the underlying table's columns so as
to save doing a projection step at runtime.  The logic was intentionally
more aggressive than before about applying the optimization, which is
generally a good thing, but Andres Freund found a case in which it got
too aggressive.  Namely, if any column is referenced more than once in
the parent plan node's sorting or grouping column list, we can't optimize
because then that column would need to have more than one ressortgroupref
label, and we only have space for one.

Add logic to detect this situation in use_physical_tlist(), and also add
some error checking in apply_pathtarget_labeling_to_tlist(), which this
example proves was being overly cavalier about whether what it was doing
made any sense.

The added test case exposes the problem only because we do not eliminate
duplicate grouping keys.  That might be something to fix someday, but it
doesn't seem like appropriate post-beta work.

Report: <20160526021235.w4nq7k3gnheg7vit@alap3.anarazel.de>

src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/tlist.c
src/test/regress/expected/select_distinct.out
src/test/regress/sql/select_distinct.sql

index 185f0625a78a75ebeac890bd4f1410d907bddcf3..bd19f43d586db497514c17630644788d0707a6d2 100644 (file)
@@ -787,10 +787,14 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
         * to emit any sort/group columns that are not simple Vars.  (If they are
         * simple Vars, they should appear in the physical tlist, and
         * apply_pathtarget_labeling_to_tlist will take care of getting them
-        * labeled again.)
+        * labeled again.)      We also have to check that no two sort/group columns
+        * are the same Var, else that element of the physical tlist would need
+        * conflicting ressortgroupref labels.
         */
        if ((flags & CP_LABEL_TLIST) && path->pathtarget->sortgrouprefs)
        {
+               Bitmapset  *sortgroupatts = NULL;
+
                i = 0;
                foreach(lc, path->pathtarget->exprs)
                {
@@ -799,7 +803,14 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
                        if (path->pathtarget->sortgrouprefs[i])
                        {
                                if (expr && IsA(expr, Var))
-                                        /* okay */ ;
+                               {
+                                       int                     attno = ((Var *) expr)->varattno;
+
+                                       attno -= FirstLowInvalidHeapAttributeNumber;
+                                       if (bms_is_member(attno, sortgroupatts))
+                                               return false;
+                                       sortgroupatts = bms_add_member(sortgroupatts, attno);
+                               }
                                else
                                        return false;
                        }
index aa2c2f890c0b4659d7b0dea85809a9e92b75d6b3..94825408b2a4449e6c336e90d625a3098417cba9 100644 (file)
@@ -736,17 +736,28 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
                         * this allows us to deal with some cases where a set-returning
                         * function has been inlined, so that we now have more knowledge
                         * about what it returns than we did when the original Var was
-                        * created.  Otherwise, use regular equal() to see if there's a
-                        * matching TLE.  (In current usage, only the Var case is actually
-                        * needed; but it seems best to have sane behavior here for
-                        * non-Vars too.)
+                        * created.  Otherwise, use regular equal() to find the matching
+                        * TLE.  (In current usage, only the Var case is actually needed;
+                        * but it seems best to have sane behavior here for non-Vars too.)
                         */
                        if (expr && IsA(expr, Var))
                                tle = tlist_member_match_var((Var *) expr, tlist);
                        else
                                tle = tlist_member((Node *) expr, tlist);
-                       if (tle)
-                               tle->ressortgroupref = target->sortgrouprefs[i];
+
+                       /*
+                        * Complain if noplace for the sortgrouprefs label, or if we'd
+                        * have to label a column twice.  (The case where it already has
+                        * the desired label probably can't happen, but we may as well
+                        * allow for it.)
+                        */
+                       if (!tle)
+                               elog(ERROR, "ORDER/GROUP BY expression not found in targetlist");
+                       if (tle->ressortgroupref != 0 &&
+                               tle->ressortgroupref != target->sortgrouprefs[i])
+                               elog(ERROR, "targetlist item has multiple sortgroupref labels");
+
+                       tle->ressortgroupref = target->sortgrouprefs[i];
                }
                i++;
        }
index 38107a041332e5583235658ce941c7545b376627..f3696c6d1de2b4eec9b2199545a8de4e30d4991c 100644 (file)
@@ -124,6 +124,30 @@ SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
    8
 (20 rows)
 
+--
+-- Check mentioning same column more than once
+--
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Aggregate
+   Output: count(*)
+   ->  HashAggregate
+         Output: tenk1.two, tenk1.four, tenk1.two
+         Group Key: tenk1.two, tenk1.four, tenk1.two
+         ->  Seq Scan on public.tenk1
+               Output: tenk1.two, tenk1.four, tenk1.two
+(7 rows)
+
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+ count 
+-------
+     4
+(1 row)
+
 --
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
index 328ba51c7a8ce1228b76cfa52d6a5fa1ababb1f3..a605e86449eb349ed25b73588a9c43630535f0de 100644 (file)
@@ -34,6 +34,17 @@ SELECT DISTINCT two, string4, ten
 --
 SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
 
+--
+-- Check mentioning same column more than once
+--
+
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+
 --
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.