Update comments about pathkeys.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 13 Aug 1999 01:17:16 +0000 (01:17 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 13 Aug 1999 01:17:16 +0000 (01:17 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 13 Aug 1999 01:17:16 +0000 (01:17 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 13 Aug 1999 01:17:16 +0000 (01:17 +0000)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index cf9f87faf7a536dc89e5a463be079586788bf33a..c0782c5665b1cf4f7ed310336ee915cc894da11b 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.12 1999/07/16 04:59:15 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.13 1999/08/13 01:17:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -33,18 +33,24 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
   *     order of the result generated by the Path.
   *
   *     In single/base relation RelOptInfo's, the Path's represent various ways
- *     of generating the relation and the resulting ordering of the tuples.
+ *     of scanning the relation and the resulting ordering of the tuples.
   *     Sequential scan Paths have NIL pathkeys, indicating no known ordering.
- *     Index scans have Path.pathkeys that represent the chosen index.
- *     A single-key index pathkeys would be { {tab1_indexkey1} }.      For a
- *     multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
- *     indicating major sort by indexkey1 and minor sort by indexkey2.
+ *     Index scans have Path.pathkeys that represent the chosen index's ordering,
+ *  if any.  A single-key index would create a pathkey with a single sublist,
+ *     e.g. ( (tab1_indexkey1) ).  A multi-key index generates a sublist per key,
+ *     e.g. ( (tab1_indexkey1) (tab1_indexkey2) ) which shows major sort by
+ *     indexkey1 and minor sort by indexkey2.
+ *
+ *     Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
+ *     we can say nothing about the overall order of its result.  Also, an index
+ *     scan on an unordered type of index generates no useful pathkeys.  However,
+ *     we can always create a pathkey by doing an explicit sort.
   *
   *     Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
   *     only performed with equijoins ("=").  Because of this, the multi-relation
   *     path actually has more than one primary Var key.  For example, a
- *     mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
- *     { {tab1.col1, tab2.col1} }, indicating that the major sort order of the
+ *     mergejoin Path of "tab1.col1 = tab2.col1" would generate pathkeys of
+ *     ( (tab1.col1 tab2.col1) ), indicating that the major sort order of the
   *     Path can be taken to be *either* tab1.col1 or tab2.col1.
   *     They are equal, so they are both primary sort keys.  This allows future
   *     joins to use either Var as a pre-sorted key to prevent upper Mergejoins
@@ -53,21 +59,30 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
   *     Note that while the order of the top list is meaningful (primary vs.
   *     secondary sort key), the order of each sublist is arbitrary.
   *
- *     For multi-key sorts, if the outer is sorted by a multi-key index, the
- *     multi-key index remains after the join.  If the inner has a multi-key
- *     sort, only the primary key of the inner is added to the result.
- *     Mergejoins only join on the primary key.  Currently, non-primary keys
- *     in the pathkeys List are of limited value.
+ *     We can actually keep all of the keys of the outer path of a merge or
+ *     nestloop join, since the ordering of the outer path will be reflected
+ *     in the result.  We add to each pathkey sublist any inner vars that are
+ *     equijoined to any of the outer vars in the sublist.  In the nestloop
+ *     case we have to be careful to consider only equijoin operators; the
+ *     nestloop's join clauses might include non-equijoin operators.
+ *     (Currently, we do this by considering only mergejoinable operators while
+ *     making the pathkeys, since we have no separate marking for operators that
+ *     are equijoins but aren't mergejoinable.)
   *
   *     Although Hashjoins also work only with equijoin operators, it is *not*
   *     safe to consider the output of a Hashjoin to be sorted in any particular
   *     order --- not even the outer path's order.  This is true because the
- *     executor might have to split the join into multiple batches.
+ *     executor might have to split the join into multiple batches.  Therefore
+ *     a Hashjoin is always given NIL pathkeys.
   *
- *     NestJoin does not perform sorting, and allows non-equijoins, so it does
- *     not allow useful pathkeys.      (But couldn't we use the outer path's order?)
+ *     Notice that pathkeys only say *what* is being ordered, and not *how*
+ *     it is ordered.  The actual sort ordering is indicated by a separate
+ *     data structure, the PathOrder.  The PathOrder provides a sort operator
+ *     OID for each of the sublists of the path key.  This is fairly bogus,
+ *     since in cross-datatype cases we really want to keep track of more than
+ *     one sort operator...
   *
- *     -- bjm
+ *     -- bjm & tgl
   *--------------------
   */
  
@@ -328,17 +343,32 @@ make_pathkeys_from_joinkeys(List *joinkeys,
  
  /*
   * new_join_pathkeys
- *       Find the path keys for a join relation by finding all vars in the list
- *       of join clauses 'joinclauses' such that:
- *             (1) the var corresponding to the outer join relation is a
- *                     key on the outer path
- *             (2) the var appears in the target list of the join relation
- *       In other words, add to each outer path key the inner path keys that
- *       are required for qualification.
+ *       Build the path keys for a join relation constructed by mergejoin or
+ *       nestloop join.  These keys should include all the path key vars of the
+ *       outer path (since the join will retain the ordering of the outer path)
+ *       plus any vars of the inner path that are mergejoined to the outer vars.
+ *
+ *       Per the discussion at the top of this file, mergejoined inner vars
+ *       can be considered path keys of the result, just the same as the outer
+ *       vars they were joined with.
+ *
+ *       We can also use inner path vars as pathkeys of a nestloop join, but we
+ *       must be careful that we only consider equijoin clauses and not general
+ *       join clauses.  For example, "t1.a < t2.b" might be a join clause of a
+ *       nestloop, but it doesn't result in b acquiring the ordering of a!
+ *       joinpath.c handles that problem by only passing this routine clauses
+ *       that are marked mergejoinable, even if a nestloop join is being built.
+ *       Therefore we only have 't1.a = t2.b' style clauses, and can expect that
+ *       the inner var will acquire the outer's ordering no matter which join
+ *       method is actually used.
+ *
+ * All vars in the result are copied from the join relation's tlist, not from
+ * the given pathkeys or the join clauses.  (Is that necessary?  I suspect
+ * not --- tgl)
   *
   * 'outer_pathkeys' is the list of the outer path's path keys
   * 'join_rel_tlist' is the target list of the join relation
- * 'joinclauses' is the list of restricting join clauses
+ * 'joinclauses' is the list of mergejoinable join clauses
   *
   * Returns the list of new path keys.
   *
@@ -358,8 +388,13 @@ new_join_pathkeys(List *outer_pathkeys,
  
                 new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
                                                                            joinclauses);
-               if (new_pathkey != NIL)
-                       final_pathkeys = lappend(final_pathkeys, new_pathkey);
+               /* if we can find no sortable vars for the n'th sort key,
+                * then we're done generating pathkeys; can't expect to order
+                * subsequent vars.  Not clear that this can really happen.
+                */
+               if (new_pathkey == NIL)
+                       break;
+               final_pathkeys = lappend(final_pathkeys, new_pathkey);
         }
         return final_pathkeys;
  }
@@ -372,7 +407,7 @@ new_join_pathkeys(List *outer_pathkeys,
   *       at the top of this file).
   *
   *       Note that each returned pathkey is the var node found in
- *       'join_rel_tlist' rather than the joinclause var node.
+ *       'join_rel_tlist' rather than the input pathkey or joinclause var node.
   *       (Is this important?)  Also, we return a fully copied list
   *       that does not share any subnodes with existing data structures.
   *       (Is that important, either?)
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 13 Aug 1999 01:17:16 +0000 (01:17 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 13 Aug 1999 01:17:16 +0000 (01:17 +0000)