]> granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c
Represent type-specific length coercion functions as pg_cast entries,
[postgresql] / src / backend / parser / parse_clause.c
1 /*-------------------------------------------------------------------------
2  *
3  * parse_clause.c
4  *        handle clauses in parser
5  *
6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.133 2004/06/16 01:26:44 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include "access/heapam.h"
19 #include "catalog/heap.h"
20 #include "nodes/makefuncs.h"
21 #include "optimizer/clauses.h"
22 #include "optimizer/tlist.h"
23 #include "optimizer/var.h"
24 #include "parser/analyze.h"
25 #include "parser/parsetree.h"
26 #include "parser/parse_clause.h"
27 #include "parser/parse_coerce.h"
28 #include "parser/parse_expr.h"
29 #include "parser/parse_oper.h"
30 #include "parser/parse_relation.h"
31 #include "parser/parse_target.h"
32 #include "parser/parse_type.h"
33 #include "rewrite/rewriteManip.h"
34 #include "utils/builtins.h"
35 #include "utils/guc.h"
36
37
38 #define ORDER_CLAUSE 0
39 #define GROUP_CLAUSE 1
40 #define DISTINCT_ON_CLAUSE 2
41
42 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
43
44 static void extractRemainingColumns(List *common_colnames,
45                                                 List *src_colnames, List *src_colvars,
46                                                 List **res_colnames, List **res_colvars);
47 static Node *transformJoinUsingClause(ParseState *pstate,
48                                                  List *leftVars, List *rightVars);
49 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
50                                           List *containedRels);
51 static RangeTblRef *transformTableEntry(ParseState *pstate, RangeVar *r);
52 static RangeTblRef *transformRangeSubselect(ParseState *pstate,
53                                                 RangeSubselect *r);
54 static RangeTblRef *transformRangeFunction(ParseState *pstate,
55                                            RangeFunction *r);
56 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
57                                                 List **containedRels);
58 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
59                                    Var *l_colvar, Var *r_colvar);
60 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
61                                         List **tlist, int clause);
62
63
64 /*
65  * transformFromClause -
66  *        Process the FROM clause and add items to the query's range table,
67  *        joinlist, and namespace.
68  *
69  * Note: we assume that pstate's p_rtable, p_joinlist, and p_namespace lists
70  * were initialized to NIL when the pstate was created.  We will add onto
71  * any entries already present --- this is needed for rule processing, as
72  * well as for UPDATE and DELETE.
73  *
74  * The range table may grow still further when we transform the expressions
75  * in the query's quals and target list. (This is possible because in
76  * POSTQUEL, we allowed references to relations not specified in the
77  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
78  */
79 void
80 transformFromClause(ParseState *pstate, List *frmList)
81 {
82         ListCell   *fl;
83
84         /*
85          * The grammar will have produced a list of RangeVars,
86          * RangeSubselects, RangeFunctions, and/or JoinExprs. Transform each
87          * one (possibly adding entries to the rtable), check for duplicate
88          * refnames, and then add it to the joinlist and namespace.
89          */
90         foreach(fl, frmList)
91         {
92                 Node       *n = lfirst(fl);
93                 List       *containedRels;
94
95                 n = transformFromClauseItem(pstate, n, &containedRels);
96                 checkNameSpaceConflicts(pstate, (Node *) pstate->p_namespace, n);
97                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
98                 pstate->p_namespace = lappend(pstate->p_namespace, n);
99         }
100 }
101
102 /*
103  * setTargetTable
104  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
105  *        and make the special links to it in the ParseState.
106  *
107  *        We also open the target relation and acquire a write lock on it.
108  *        This must be done before processing the FROM list, in case the target
109  *        is also mentioned as a source relation --- we want to be sure to grab
110  *        the write lock before any read lock.
111  *
112  *        If alsoSource is true, add the target to the query's joinlist and
113  *        namespace.  For INSERT, we don't want the target to be joined to;
114  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
115  *        we do need to scan or join the target.  (NOTE: we do not bother
116  *        to check for namespace conflict; we assume that the namespace was
117  *        initially empty in these cases.)
118  *
119  *        Finally, we mark the relation as requiring the permissions specified
120  *        by requiredPerms.
121  *
122  *        Returns the rangetable index of the target relation.
123  */
124 int
125 setTargetTable(ParseState *pstate, RangeVar *relation,
126                            bool inh, bool alsoSource, AclMode requiredPerms)
127 {
128         RangeTblEntry *rte;
129         int                     rtindex;
130
131         /* Close old target; this could only happen for multi-action rules */
132         if (pstate->p_target_relation != NULL)
133                 heap_close(pstate->p_target_relation, NoLock);
134
135         /*
136          * Open target rel and grab suitable lock (which we will hold till end
137          * of transaction).
138          *
139          * analyze.c will eventually do the corresponding heap_close(), but *not*
140          * release the lock.
141          */
142         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
143
144         /*
145          * Now build an RTE.
146          */
147         rte = addRangeTableEntry(pstate, relation, NULL, inh, false);
148         pstate->p_target_rangetblentry = rte;
149
150         /* assume new rte is at end */
151         rtindex = list_length(pstate->p_rtable);
152         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
153
154         /*
155          * Override addRangeTableEntry's default ACL_SELECT permissions check,
156          * and instead mark target table as requiring exactly the specified
157          * permissions.
158          *
159          * If we find an explicit reference to the rel later during parse
160          * analysis, scanRTEForColumn will add the ACL_SELECT bit back again.
161          * That can't happen for INSERT but it is possible for UPDATE and DELETE.
162          */
163         rte->requiredPerms = requiredPerms;
164
165         /*
166          * If UPDATE/DELETE, add table to joinlist and namespace.
167          */
168         if (alsoSource)
169                 addRTEtoQuery(pstate, rte, true, true);
170
171         return rtindex;
172 }
173
174 /*
175  * Simplify InhOption (yes/no/default) into boolean yes/no.
176  *
177  * The reason we do things this way is that we don't want to examine the
178  * SQL_inheritance option flag until parse_analyze is run.      Otherwise,
179  * we'd do the wrong thing with query strings that intermix SET commands
180  * with queries.
181  */
182 bool
183 interpretInhOption(InhOption inhOpt)
184 {
185         switch (inhOpt)
186         {
187                 case INH_NO:
188                         return false;
189                 case INH_YES:
190                         return true;
191                 case INH_DEFAULT:
192                         return SQL_inheritance;
193         }
194         elog(ERROR, "bogus InhOption value: %d", inhOpt);
195         return false;                           /* keep compiler quiet */
196 }
197
198 /*
199  * Given an enum that indicates whether WITH / WITHOUT OIDS was
200  * specified by the user, return true iff the specified table/result
201  * set should be created with OIDs. This needs to be done after
202  * parsing the query string because the return value can depend upon
203  * the default_with_oids GUC var.
204  */
205 bool
206 interpretOidsOption(ContainsOids opt)
207 {
208         switch (opt)
209         {
210                 case MUST_HAVE_OIDS:
211                         return true;
212
213                 case MUST_NOT_HAVE_OIDS:
214                         return false;
215
216                 case DEFAULT_OIDS:
217                         return default_with_oids;
218         }
219
220         elog(ERROR, "bogus ContainsOids value: %d", opt);
221         return false;                           /* keep compiler quiet */
222 }
223
224 /*
225  * Extract all not-in-common columns from column lists of a source table
226  */
227 static void
228 extractRemainingColumns(List *common_colnames,
229                                                 List *src_colnames, List *src_colvars,
230                                                 List **res_colnames, List **res_colvars)
231 {
232         List       *new_colnames = NIL;
233         List       *new_colvars = NIL;
234         ListCell   *lnames, *lvars;
235
236         Assert(list_length(src_colnames) == list_length(src_colvars));
237
238         forboth(lnames, src_colnames, lvars, src_colvars)
239         {
240                 char       *colname = strVal(lfirst(lnames));
241                 bool            match = false;
242                 ListCell   *cnames;
243
244                 foreach(cnames, common_colnames)
245                 {
246                         char       *ccolname = strVal(lfirst(cnames));
247
248                         if (strcmp(colname, ccolname) == 0)
249                         {
250                                 match = true;
251                                 break;
252                         }
253                 }
254
255                 if (!match)
256                 {
257                         new_colnames = lappend(new_colnames, lfirst(lnames));
258                         new_colvars = lappend(new_colvars, lfirst(lvars));
259                 }
260         }
261
262         *res_colnames = new_colnames;
263         *res_colvars = new_colvars;
264 }
265
266 /* transformJoinUsingClause()
267  *        Build a complete ON clause from a partially-transformed USING list.
268  *        We are given lists of nodes representing left and right match columns.
269  *        Result is a transformed qualification expression.
270  */
271 static Node *
272 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
273 {
274         Node       *result = NULL;
275         ListCell   *lvars, *rvars;
276
277         /*
278          * We cheat a little bit here by building an untransformed operator
279          * tree whose leaves are the already-transformed Vars.  This is OK
280          * because transformExpr() won't complain about already-transformed
281          * subnodes.
282          */
283         forboth(lvars, leftVars, rvars, rightVars)
284         {
285                 Node       *lvar = (Node *) lfirst(lvars);
286                 Node       *rvar = (Node *) lfirst(rvars);
287                 A_Expr     *e;
288
289                 e = makeSimpleA_Expr(AEXPR_OP, "=", copyObject(lvar), copyObject(rvar));
290
291                 if (result == NULL)
292                         result = (Node *) e;
293                 else
294                 {
295                         A_Expr     *a;
296
297                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e);
298                         result = (Node *) a;
299                 }
300         }
301
302         /*
303          * Since the references are already Vars, and are certainly from the
304          * input relations, we don't have to go through the same pushups that
305          * transformJoinOnClause() does.  Just invoke transformExpr() to fix
306          * up the operators, and we're done.
307          */
308         result = transformExpr(pstate, result);
309
310         result = coerce_to_boolean(pstate, result, "JOIN/USING");
311
312         return result;
313 }
314
315 /* transformJoinOnClause()
316  *        Transform the qual conditions for JOIN/ON.
317  *        Result is a transformed qualification expression.
318  */
319 static Node *
320 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
321                                           List *containedRels)
322 {
323         Node       *result;
324         List       *save_namespace;
325         Relids          clause_varnos;
326         int                     varno;
327
328         /*
329          * This is a tad tricky, for two reasons.  First, the namespace that
330          * the join expression should see is just the two subtrees of the JOIN
331          * plus any outer references from upper pstate levels.  So,
332          * temporarily set this pstate's namespace accordingly.  (We need not
333          * check for refname conflicts, because transformFromClauseItem()
334          * already did.) NOTE: this code is OK only because the ON clause
335          * can't legally alter the namespace by causing implicit relation refs
336          * to be added.
337          */
338         save_namespace = pstate->p_namespace;
339         pstate->p_namespace = list_make2(j->larg, j->rarg);
340
341         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
342
343         pstate->p_namespace = save_namespace;
344
345         /*
346          * Second, we need to check that the ON condition doesn't refer to any
347          * rels outside the input subtrees of the JOIN.  It could do that
348          * despite our hack on the namespace if it uses fully-qualified names.
349          * So, grovel through the transformed clause and make sure there are
350          * no bogus references.  (Outer references are OK, and are ignored
351          * here.)
352          */
353         clause_varnos = pull_varnos(result);
354         while ((varno = bms_first_member(clause_varnos)) >= 0)
355         {
356                 if (!list_member_int(containedRels, varno))
357                 {
358                         ereport(ERROR,
359                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
360                                          errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
361                                    rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
362                 }
363         }
364         bms_free(clause_varnos);
365
366         return result;
367 }
368
369 /*
370  * transformTableEntry --- transform a RangeVar (simple relation reference)
371  */
372 static RangeTblRef *
373 transformTableEntry(ParseState *pstate, RangeVar *r)
374 {
375         RangeTblEntry *rte;
376         RangeTblRef *rtr;
377
378         /*
379          * mark this entry to indicate it comes from the FROM clause. In SQL,
380          * the target list can only refer to range variables specified in the
381          * from clause but we follow the more powerful POSTQUEL semantics and
382          * automatically generate the range variable if not specified. However
383          * there are times we need to know whether the entries are legitimate.
384          */
385         rte = addRangeTableEntry(pstate, r, r->alias,
386                                                          interpretInhOption(r->inhOpt), true);
387
388         /*
389          * We create a RangeTblRef, but we do not add it to the joinlist or
390          * namespace; our caller must do that if appropriate.
391          */
392         rtr = makeNode(RangeTblRef);
393         /* assume new rte is at end */
394         rtr->rtindex = list_length(pstate->p_rtable);
395         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
396
397         return rtr;
398 }
399
400
401 /*
402  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
403  */
404 static RangeTblRef *
405 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
406 {
407         List       *parsetrees;
408         Query      *query;
409         RangeTblEntry *rte;
410         RangeTblRef *rtr;
411
412         /*
413          * We require user to supply an alias for a subselect, per SQL92. To
414          * relax this, we'd have to be prepared to gin up a unique alias for
415          * an unlabeled subselect.
416          */
417         if (r->alias == NULL)
418                 ereport(ERROR,
419                                 (errcode(ERRCODE_SYNTAX_ERROR),
420                                  errmsg("subquery in FROM must have an alias")));
421
422         /*
423          * Analyze and transform the subquery.
424          */
425         parsetrees = parse_sub_analyze(r->subquery, pstate);
426
427         /*
428          * Check that we got something reasonable.      Most of these conditions
429          * are probably impossible given restrictions of the grammar, but
430          * check 'em anyway.
431          */
432         if (list_length(parsetrees) != 1)
433                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
434         query = (Query *) linitial(parsetrees);
435         if (query == NULL || !IsA(query, Query))
436                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
437
438         if (query->commandType != CMD_SELECT)
439                 elog(ERROR, "expected SELECT query from subquery in FROM");
440         if (query->resultRelation != 0 || query->into != NULL)
441                 ereport(ERROR,
442                                 (errcode(ERRCODE_SYNTAX_ERROR),
443                                  errmsg("subquery in FROM may not have SELECT INTO")));
444
445         /*
446          * The subquery cannot make use of any variables from FROM items
447          * created earlier in the current query.  Per SQL92, the scope of a
448          * FROM item does not include other FROM items.  Formerly we hacked
449          * the namespace so that the other variables weren't even visible, but
450          * it seems more useful to leave them visible and give a specific
451          * error message.
452          *
453          * XXX this will need further work to support SQL99's LATERAL() feature,
454          * wherein such references would indeed be legal.
455          *
456          * We can skip groveling through the subquery if there's not anything
457          * visible in the current query.  Also note that outer references are
458          * OK.
459          */
460         if (pstate->p_namespace)
461         {
462                 if (contain_vars_of_level((Node *) query, 1))
463                         ereport(ERROR,
464                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
465                                          errmsg("subquery in FROM may not refer to other relations of same query level")));
466         }
467
468         /*
469          * OK, build an RTE for the subquery.
470          */
471         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
472
473         /*
474          * We create a RangeTblRef, but we do not add it to the joinlist or
475          * namespace; our caller must do that if appropriate.
476          */
477         rtr = makeNode(RangeTblRef);
478         /* assume new rte is at end */
479         rtr->rtindex = list_length(pstate->p_rtable);
480         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
481
482         return rtr;
483 }
484
485
486 /*
487  * transformRangeFunction --- transform a function call appearing in FROM
488  */
489 static RangeTblRef *
490 transformRangeFunction(ParseState *pstate, RangeFunction *r)
491 {
492         Node       *funcexpr;
493         char       *funcname;
494         RangeTblEntry *rte;
495         RangeTblRef *rtr;
496
497         /* Get function name for possible use as alias */
498         Assert(IsA(r->funccallnode, FuncCall));
499         funcname = strVal(llast(((FuncCall *) r->funccallnode)->funcname));
500
501         /*
502          * Transform the raw FuncCall node.
503          */
504         funcexpr = transformExpr(pstate, r->funccallnode);
505
506         /*
507          * The function parameters cannot make use of any variables from other
508          * FROM items.  (Compare to transformRangeSubselect(); the coding is
509          * different though because we didn't parse as a sub-select with its
510          * own level of namespace.)
511          *
512          * XXX this will need further work to support SQL99's LATERAL() feature,
513          * wherein such references would indeed be legal.
514          */
515         if (pstate->p_namespace)
516         {
517                 if (contain_vars_of_level(funcexpr, 0))
518                         ereport(ERROR,
519                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
520                                          errmsg("function expression in FROM may not refer to other relations of same query level")));
521         }
522
523         /*
524          * Disallow aggregate functions in the expression.      (No reason to
525          * postpone this check until parseCheckAggregates.)
526          */
527         if (pstate->p_hasAggs)
528         {
529                 if (checkExprHasAggs(funcexpr))
530                         ereport(ERROR,
531                                         (errcode(ERRCODE_GROUPING_ERROR),
532                                          errmsg("cannot use aggregate function in function expression in FROM")));
533         }
534
535         /*
536          * If a coldeflist is supplied, ensure it defines a legal set of names
537          * (no duplicates) and datatypes (no pseudo-types, for instance).
538          */
539         if (r->coldeflist)
540         {
541                 TupleDesc       tupdesc;
542
543                 tupdesc = BuildDescForRelation(r->coldeflist);
544                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
545         }
546
547         /*
548          * OK, build an RTE for the function.
549          */
550         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
551                                                                                 r, true);
552
553         /*
554          * We create a RangeTblRef, but we do not add it to the joinlist or
555          * namespace; our caller must do that if appropriate.
556          */
557         rtr = makeNode(RangeTblRef);
558         /* assume new rte is at end */
559         rtr->rtindex = list_length(pstate->p_rtable);
560         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
561
562         return rtr;
563 }
564
565
566 /*
567  * transformFromClauseItem -
568  *        Transform a FROM-clause item, adding any required entries to the
569  *        range table list being built in the ParseState, and return the
570  *        transformed item ready to include in the joinlist and namespace.
571  *        This routine can recurse to handle SQL92 JOIN expressions.
572  *
573  *        Aside from the primary return value (the transformed joinlist item)
574  *        this routine also returns an integer list of the rangetable indexes
575  *        of all the base and join relations represented in the joinlist item.
576  *        This list is needed for checking JOIN/ON conditions in higher levels.
577  */
578 static Node *
579 transformFromClauseItem(ParseState *pstate, Node *n, List **containedRels)
580 {
581         if (IsA(n, RangeVar))
582         {
583                 /* Plain relation reference */
584                 RangeTblRef *rtr;
585
586                 rtr = transformTableEntry(pstate, (RangeVar *) n);
587                 *containedRels = list_make1_int(rtr->rtindex);
588                 return (Node *) rtr;
589         }
590         else if (IsA(n, RangeSubselect))
591         {
592                 /* sub-SELECT is like a plain relation */
593                 RangeTblRef *rtr;
594
595                 rtr = transformRangeSubselect(pstate, (RangeSubselect *) n);
596                 *containedRels = list_make1_int(rtr->rtindex);
597                 return (Node *) rtr;
598         }
599         else if (IsA(n, RangeFunction))
600         {
601                 /* function is like a plain relation */
602                 RangeTblRef *rtr;
603
604                 rtr = transformRangeFunction(pstate, (RangeFunction *) n);
605                 *containedRels = list_make1_int(rtr->rtindex);
606                 return (Node *) rtr;
607         }
608         else if (IsA(n, JoinExpr))
609         {
610                 /* A newfangled join expression */
611                 JoinExpr   *j = (JoinExpr *) n;
612                 List       *my_containedRels,
613                                    *l_containedRels,
614                                    *r_containedRels,
615                                    *l_colnames,
616                                    *r_colnames,
617                                    *res_colnames,
618                                    *l_colvars,
619                                    *r_colvars,
620                                    *res_colvars;
621                 Index           leftrti,
622                                         rightrti;
623                 RangeTblEntry *rte;
624
625                 /*
626                  * Recursively process the left and right subtrees
627                  */
628                 j->larg = transformFromClauseItem(pstate, j->larg, &l_containedRels);
629                 j->rarg = transformFromClauseItem(pstate, j->rarg, &r_containedRels);
630
631                 /*
632                  * Generate combined list of relation indexes for possible use by
633                  * transformJoinOnClause below.
634                  */
635                 my_containedRels = list_concat(l_containedRels, r_containedRels);
636
637                 /*
638                  * Check for conflicting refnames in left and right subtrees. Must
639                  * do this because higher levels will assume I hand back a self-
640                  * consistent namespace subtree.
641                  */
642                 checkNameSpaceConflicts(pstate, j->larg, j->rarg);
643
644                 /*
645                  * Extract column name and var lists from both subtrees
646                  *
647                  * Note: expandRTE returns new lists, safe for me to modify
648                  */
649                 if (IsA(j->larg, RangeTblRef))
650                         leftrti = ((RangeTblRef *) j->larg)->rtindex;
651                 else if (IsA(j->larg, JoinExpr))
652                         leftrti = ((JoinExpr *) j->larg)->rtindex;
653                 else
654                 {
655                         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(j->larg));
656                         leftrti = 0;            /* keep compiler quiet */
657                 }
658                 rte = rt_fetch(leftrti, pstate->p_rtable);
659                 expandRTE(pstate, rte, &l_colnames, &l_colvars);
660
661                 if (IsA(j->rarg, RangeTblRef))
662                         rightrti = ((RangeTblRef *) j->rarg)->rtindex;
663                 else if (IsA(j->rarg, JoinExpr))
664                         rightrti = ((JoinExpr *) j->rarg)->rtindex;
665                 else
666                 {
667                         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(j->rarg));
668                         rightrti = 0;           /* keep compiler quiet */
669                 }
670                 rte = rt_fetch(rightrti, pstate->p_rtable);
671                 expandRTE(pstate, rte, &r_colnames, &r_colvars);
672
673                 /*
674                  * Natural join does not explicitly specify columns; must generate
675                  * columns to join. Need to run through the list of columns from
676                  * each table or join result and match up the column names. Use
677                  * the first table, and check every column in the second table for
678                  * a match.  (We'll check that the matches were unique later on.)
679                  * The result of this step is a list of column names just like an
680                  * explicitly-written USING list.
681                  */
682                 if (j->isNatural)
683                 {
684                         List       *rlist = NIL;
685                         ListCell   *lx,
686                                            *rx;
687
688                         Assert(j->using == NIL);        /* shouldn't have USING() too */
689
690                         foreach(lx, l_colnames)
691                         {
692                                 char       *l_colname = strVal(lfirst(lx));
693                                 Value      *m_name = NULL;
694
695                                 foreach(rx, r_colnames)
696                                 {
697                                         char       *r_colname = strVal(lfirst(rx));
698
699                                         if (strcmp(l_colname, r_colname) == 0)
700                                         {
701                                                 m_name = makeString(l_colname);
702                                                 break;
703                                         }
704                                 }
705
706                                 /* matched a right column? then keep as join column... */
707                                 if (m_name != NULL)
708                                         rlist = lappend(rlist, m_name);
709                         }
710
711                         j->using = rlist;
712                 }
713
714                 /*
715                  * Now transform the join qualifications, if any.
716                  */
717                 res_colnames = NIL;
718                 res_colvars = NIL;
719
720                 if (j->using)
721                 {
722                         /*
723                          * JOIN/USING (or NATURAL JOIN, as transformed above).
724                          * Transform the list into an explicit ON-condition, and
725                          * generate a list of merged result columns.
726                          */
727                         List       *ucols = j->using;
728                         List       *l_usingvars = NIL;
729                         List       *r_usingvars = NIL;
730                         ListCell   *ucol;
731
732                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
733
734                         foreach(ucol, ucols)
735                         {
736                                 char       *u_colname = strVal(lfirst(ucol));
737                                 ListCell   *col;
738                                 int                     ndx;
739                                 int                     l_index = -1;
740                                 int                     r_index = -1;
741                                 Var                *l_colvar,
742                                                    *r_colvar;
743
744                                 /* Check for USING(foo,foo) */
745                                 foreach(col, res_colnames)
746                                 {
747                                         char       *res_colname = strVal(lfirst(col));
748
749                                         if (strcmp(res_colname, u_colname) == 0)
750                                                 ereport(ERROR,
751                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
752                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
753                                                                                 u_colname)));
754                                 }
755
756                                 /* Find it in left input */
757                                 ndx = 0;
758                                 foreach(col, l_colnames)
759                                 {
760                                         char       *l_colname = strVal(lfirst(col));
761
762                                         if (strcmp(l_colname, u_colname) == 0)
763                                         {
764                                                 if (l_index >= 0)
765                                                         ereport(ERROR,
766                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
767                                                                          errmsg("common column name \"%s\" appears more than once in left table",
768                                                                                         u_colname)));
769                                                 l_index = ndx;
770                                         }
771                                         ndx++;
772                                 }
773                                 if (l_index < 0)
774                                         ereport(ERROR,
775                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
776                                                          errmsg("column \"%s\" specified in USING clause does not exist in left table",
777                                                                         u_colname)));
778
779                                 /* Find it in right input */
780                                 ndx = 0;
781                                 foreach(col, r_colnames)
782                                 {
783                                         char       *r_colname = strVal(lfirst(col));
784
785                                         if (strcmp(r_colname, u_colname) == 0)
786                                         {
787                                                 if (r_index >= 0)
788                                                         ereport(ERROR,
789                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
790                                                                          errmsg("common column name \"%s\" appears more than once in right table",
791                                                                                         u_colname)));
792                                                 r_index = ndx;
793                                         }
794                                         ndx++;
795                                 }
796                                 if (r_index < 0)
797                                         ereport(ERROR,
798                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
799                                                          errmsg("column \"%s\" specified in USING clause does not exist in right table",
800                                                                         u_colname)));
801
802                                 l_colvar = list_nth(l_colvars, l_index);
803                                 l_usingvars = lappend(l_usingvars, l_colvar);
804                                 r_colvar = list_nth(r_colvars, r_index);
805                                 r_usingvars = lappend(r_usingvars, r_colvar);
806
807                                 res_colnames = lappend(res_colnames, lfirst(ucol));
808                                 res_colvars = lappend(res_colvars,
809                                                                           buildMergedJoinVar(pstate,
810                                                                                                                  j->jointype,
811                                                                                                                  l_colvar,
812                                                                                                                  r_colvar));
813                         }
814
815                         j->quals = transformJoinUsingClause(pstate,
816                                                                                                 l_usingvars,
817                                                                                                 r_usingvars);
818                 }
819                 else if (j->quals)
820                 {
821                         /* User-written ON-condition; transform it */
822                         j->quals = transformJoinOnClause(pstate, j, my_containedRels);
823                 }
824                 else
825                 {
826                         /* CROSS JOIN: no quals */
827                 }
828
829                 /* Add remaining columns from each side to the output columns */
830                 extractRemainingColumns(res_colnames,
831                                                                 l_colnames, l_colvars,
832                                                                 &l_colnames, &l_colvars);
833                 extractRemainingColumns(res_colnames,
834                                                                 r_colnames, r_colvars,
835                                                                 &r_colnames, &r_colvars);
836                 res_colnames = list_concat(res_colnames, l_colnames);
837                 res_colvars = list_concat(res_colvars, l_colvars);
838                 res_colnames = list_concat(res_colnames, r_colnames);
839                 res_colvars = list_concat(res_colvars, r_colvars);
840
841                 /*
842                  * Check alias (AS clause), if any.
843                  */
844                 if (j->alias)
845                 {
846                         if (j->alias->colnames != NIL)
847                         {
848                                 if (list_length(j->alias->colnames) > list_length(res_colnames))
849                                         ereport(ERROR,
850                                                         (errcode(ERRCODE_SYNTAX_ERROR),
851                                                          errmsg("column alias list for \"%s\" has too many entries",
852                                                                         j->alias->aliasname)));
853                         }
854                 }
855
856                 /*
857                  * Now build an RTE for the result of the join
858                  */
859                 rte = addRangeTableEntryForJoin(pstate,
860                                                                                 res_colnames,
861                                                                                 j->jointype,
862                                                                                 res_colvars,
863                                                                                 j->alias,
864                                                                                 true);
865
866                 /* assume new rte is at end */
867                 j->rtindex = list_length(pstate->p_rtable);
868                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
869
870                 /*
871                  * Include join RTE in returned containedRels list
872                  */
873                 *containedRels = lcons_int(j->rtindex, my_containedRels);
874
875                 return (Node *) j;
876         }
877         else
878                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
879         return NULL;                            /* can't get here, keep compiler quiet */
880 }
881
882 /*
883  * buildMergedJoinVar -
884  *        generate a suitable replacement expression for a merged join column
885  */
886 static Node *
887 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
888                                    Var *l_colvar, Var *r_colvar)
889 {
890         Oid                     outcoltype;
891         int32           outcoltypmod;
892         Node       *l_node,
893                            *r_node,
894                            *res_node;
895
896         /*
897          * Choose output type if input types are dissimilar.
898          */
899         outcoltype = l_colvar->vartype;
900         outcoltypmod = l_colvar->vartypmod;
901         if (outcoltype != r_colvar->vartype)
902         {
903                 outcoltype = select_common_type(list_make2_oid(l_colvar->vartype,
904                                                                                                            r_colvar->vartype),
905                                                                                 "JOIN/USING");
906                 outcoltypmod = -1;              /* ie, unknown */
907         }
908         else if (outcoltypmod != r_colvar->vartypmod)
909         {
910                 /* same type, but not same typmod */
911                 outcoltypmod = -1;              /* ie, unknown */
912         }
913
914         /*
915          * Insert coercion functions if needed.  Note that a difference in
916          * typmod can only happen if input has typmod but outcoltypmod is -1.
917          * In that case we insert a RelabelType to clearly mark that result's
918          * typmod is not same as input.  We never need coerce_type_typmod.
919          */
920         if (l_colvar->vartype != outcoltype)
921                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
922                                                          outcoltype, outcoltypmod,
923                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
924         else if (l_colvar->vartypmod != outcoltypmod)
925                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
926                                                                                   outcoltype, outcoltypmod,
927                                                                                   COERCE_IMPLICIT_CAST);
928         else
929                 l_node = (Node *) l_colvar;
930
931         if (r_colvar->vartype != outcoltype)
932                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
933                                                          outcoltype, outcoltypmod,
934                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
935         else if (r_colvar->vartypmod != outcoltypmod)
936                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
937                                                                                   outcoltype, outcoltypmod,
938                                                                                   COERCE_IMPLICIT_CAST);
939         else
940                 r_node = (Node *) r_colvar;
941
942         /*
943          * Choose what to emit
944          */
945         switch (jointype)
946         {
947                 case JOIN_INNER:
948
949                         /*
950                          * We can use either var; prefer non-coerced one if available.
951                          */
952                         if (IsA(l_node, Var))
953                                 res_node = l_node;
954                         else if (IsA(r_node, Var))
955                                 res_node = r_node;
956                         else
957                                 res_node = l_node;
958                         break;
959                 case JOIN_LEFT:
960                         /* Always use left var */
961                         res_node = l_node;
962                         break;
963                 case JOIN_RIGHT:
964                         /* Always use right var */
965                         res_node = r_node;
966                         break;
967                 case JOIN_FULL:
968                         {
969                                 /*
970                                  * Here we must build a COALESCE expression to ensure that
971                                  * the join output is non-null if either input is.
972                                  */
973                                 CoalesceExpr *c = makeNode(CoalesceExpr);
974
975                                 c->coalescetype = outcoltype;
976                                 c->args = list_make2(l_node, r_node);
977                                 res_node = (Node *) c;
978                                 break;
979                         }
980                 default:
981                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
982                         res_node = NULL;        /* keep compiler quiet */
983                         break;
984         }
985
986         return res_node;
987 }
988
989
990 /*
991  * transformWhereClause -
992  *        Transform the qualification and make sure it is of type boolean.
993  *        Used for WHERE and allied clauses.
994  *
995  * constructName does not affect the semantics, but is used in error messages
996  */
997 Node *
998 transformWhereClause(ParseState *pstate, Node *clause,
999                                          const char *constructName)
1000 {
1001         Node       *qual;
1002
1003         if (clause == NULL)
1004                 return NULL;
1005
1006         qual = transformExpr(pstate, clause);
1007
1008         qual = coerce_to_boolean(pstate, qual, constructName);
1009
1010         return qual;
1011 }
1012
1013
1014 /*
1015  * transformLimitClause -
1016  *        Transform the expression and make sure it is of type integer.
1017  *        Used for LIMIT and allied clauses.
1018  *
1019  * constructName does not affect the semantics, but is used in error messages
1020  */
1021 Node *
1022 transformLimitClause(ParseState *pstate, Node *clause,
1023                                          const char *constructName)
1024 {
1025         Node       *qual;
1026
1027         if (clause == NULL)
1028                 return NULL;
1029
1030         qual = transformExpr(pstate, clause);
1031
1032         qual = coerce_to_integer(pstate, qual, constructName);
1033
1034         /*
1035          * LIMIT can't refer to any vars or aggregates of the current query;
1036          * we don't allow subselects either (though that case would at least
1037          * be sensible)
1038          */
1039         if (contain_vars_of_level(qual, 0))
1040         {
1041                 ereport(ERROR,
1042                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1043                 /* translator: %s is name of a SQL construct, eg LIMIT */
1044                                  errmsg("argument of %s must not contain variables",
1045                                                 constructName)));
1046         }
1047         if (checkExprHasAggs(qual))
1048         {
1049                 ereport(ERROR,
1050                                 (errcode(ERRCODE_GROUPING_ERROR),
1051                 /* translator: %s is name of a SQL construct, eg LIMIT */
1052                                  errmsg("argument of %s must not contain aggregates",
1053                                                 constructName)));
1054         }
1055         if (contain_subplans(qual))
1056         {
1057                 ereport(ERROR,
1058                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1059                 /* translator: %s is name of a SQL construct, eg LIMIT */
1060                                  errmsg("argument of %s must not contain subqueries",
1061                                                 constructName)));
1062         }
1063
1064         return qual;
1065 }
1066
1067
1068 /*
1069  *      findTargetlistEntry -
1070  *        Returns the targetlist entry matching the given (untransformed) node.
1071  *        If no matching entry exists, one is created and appended to the target
1072  *        list as a "resjunk" node.
1073  *
1074  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1075  * tlist        the target list (passed by reference so we can append to it)
1076  * clause       identifies clause type being processed
1077  */
1078 static TargetEntry *
1079 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1080 {
1081         TargetEntry *target_result = NULL;
1082         ListCell   *tl;
1083         Node       *expr;
1084
1085         /*----------
1086          * Handle two special cases as mandated by the SQL92 spec:
1087          *
1088          * 1. Bare ColumnName (no qualifier or subscripts)
1089          *        For a bare identifier, we search for a matching column name
1090          *        in the existing target list.  Multiple matches are an error
1091          *        unless they refer to identical values; for example,
1092          *        we allow      SELECT a, a FROM table ORDER BY a
1093          *        but not       SELECT a AS b, b FROM table ORDER BY b
1094          *        If no match is found, we fall through and treat the identifier
1095          *        as an expression.
1096          *        For GROUP BY, it is incorrect to match the grouping item against
1097          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1098          *        is a reference to a column name exposed by FROM, not to a target
1099          *        list column.  However, many implementations (including pre-7.0
1100          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1101          *        to see if the identifier matches any FROM column name, and only
1102          *        try for a targetlist name if it doesn't.  This ensures that we
1103          *        adhere to the spec in the case where the name could be both.
1104          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1105          *        we choose to make it work like ORDER BY, on the rather flimsy
1106          *        grounds that ordinary DISTINCT works on targetlist entries.
1107          *
1108          * 2. IntegerConstant
1109          *        This means to use the n'th item in the existing target list.
1110          *        Note that it would make no sense to order/group/distinct by an
1111          *        actual constant, so this does not create a conflict with our
1112          *        extension to order/group by an expression.
1113          *        GROUP BY column-number is not allowed by SQL92, but since
1114          *        the standard has no other behavior defined for this syntax,
1115          *        we may as well accept this common extension.
1116          *
1117          * Note that pre-existing resjunk targets must not be used in either case,
1118          * since the user didn't write them in his SELECT list.
1119          *
1120          * If neither special case applies, fall through to treat the item as
1121          * an expression.
1122          *----------
1123          */
1124         if (IsA(node, ColumnRef) &&
1125                 list_length(((ColumnRef *) node)->fields) == 1)
1126         {
1127                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
1128
1129                 if (clause == GROUP_CLAUSE)
1130                 {
1131                         /*
1132                          * In GROUP BY, we must prefer a match against a FROM-clause
1133                          * column to one against the targetlist.  Look to see if there
1134                          * is a matching column.  If so, fall through to let
1135                          * transformExpr() do the rest.  NOTE: if name could refer
1136                          * ambiguously to more than one column name exposed by FROM,
1137                          * colNameToVar will ereport(ERROR).  That's just what we want
1138                          * here.
1139                          *
1140                          * Small tweak for 7.4.3: ignore matches in upper query levels.
1141                          * This effectively changes the search order for bare names to
1142                          * (1) local FROM variables, (2) local targetlist aliases,
1143                          * (3) outer FROM variables, whereas before it was (1) (3) (2).
1144                          * SQL92 and SQL99 do not allow GROUPing BY an outer reference,
1145                          * so this breaks no cases that are legal per spec, and it
1146                          * seems a more self-consistent behavior.
1147                          */
1148                         if (colNameToVar(pstate, name, true) != NULL)
1149                                 name = NULL;
1150                 }
1151
1152                 if (name != NULL)
1153                 {
1154                         foreach(tl, *tlist)
1155                         {
1156                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1157                                 Resdom     *resnode = tle->resdom;
1158
1159                                 if (!resnode->resjunk &&
1160                                         strcmp(resnode->resname, name) == 0)
1161                                 {
1162                                         if (target_result != NULL)
1163                                         {
1164                                                 if (!equal(target_result->expr, tle->expr))
1165                                                         ereport(ERROR,
1166                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1167                                                                          /* translator: first %s is name of a SQL construct, eg ORDER BY */
1168                                                                          errmsg("%s \"%s\" is ambiguous",
1169                                                                                         clauseText[clause], name)));
1170                                         }
1171                                         else
1172                                                 target_result = tle;
1173                                         /* Stay in loop to check for ambiguity */
1174                                 }
1175                         }
1176                         if (target_result != NULL)
1177                                 return target_result;   /* return the first match */
1178                 }
1179         }
1180         if (IsA(node, A_Const))
1181         {
1182                 Value      *val = &((A_Const *) node)->val;
1183                 int                     targetlist_pos = 0;
1184                 int                     target_pos;
1185
1186                 if (!IsA(val, Integer))
1187                         ereport(ERROR,
1188                                         (errcode(ERRCODE_SYNTAX_ERROR),
1189                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1190                                          errmsg("non-integer constant in %s",
1191                                                         clauseText[clause])));
1192                 target_pos = intVal(val);
1193                 foreach(tl, *tlist)
1194                 {
1195                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1196                         Resdom     *resnode = tle->resdom;
1197
1198                         if (!resnode->resjunk)
1199                         {
1200                                 if (++targetlist_pos == target_pos)
1201                                         return tle; /* return the unique match */
1202                         }
1203                 }
1204                 ereport(ERROR,
1205                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1206                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1207                                  errmsg("%s position %d is not in select list",
1208                                                 clauseText[clause], target_pos)));
1209         }
1210
1211         /*
1212          * Otherwise, we have an expression (this is a Postgres extension not
1213          * found in SQL92).  Convert the untransformed node to a transformed
1214          * expression, and search for a match in the tlist. NOTE: it doesn't
1215          * really matter whether there is more than one match.  Also, we are
1216          * willing to match a resjunk target here, though the above cases must
1217          * ignore resjunk targets.
1218          */
1219         expr = transformExpr(pstate, node);
1220
1221         foreach(tl, *tlist)
1222         {
1223                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1224
1225                 if (equal(expr, tle->expr))
1226                         return tle;
1227         }
1228
1229         /*
1230          * If no matches, construct a new target entry which is appended to
1231          * the end of the target list.  This target is given resjunk = TRUE so
1232          * that it will not be projected into the final tuple.
1233          */
1234         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1235
1236         *tlist = lappend(*tlist, target_result);
1237
1238         return target_result;
1239 }
1240
1241
1242 /*
1243  * transformGroupClause -
1244  *        transform a GROUP BY clause
1245  *
1246  * GROUP BY items will be added to the targetlist (as resjunk columns)
1247  * if not already present, so the targetlist must be passed by reference.
1248  */
1249 List *
1250 transformGroupClause(ParseState *pstate, List *grouplist,
1251                                          List **targetlist, List *sortClause)
1252 {
1253         List       *glist = NIL;
1254         ListCell   *gl;
1255         ListCell   *sortItem;
1256
1257         sortItem = list_head(sortClause);
1258
1259         foreach(gl, grouplist)
1260         {
1261                 TargetEntry *tle;
1262                 Oid                     restype;
1263                 Oid                     ordering_op;
1264                 GroupClause *grpcl;
1265
1266                 tle = findTargetlistEntry(pstate, lfirst(gl),
1267                                                                   targetlist, GROUP_CLAUSE);
1268
1269                 /* avoid making duplicate grouplist entries */
1270                 if (targetIsInSortList(tle, glist))
1271                         continue;
1272
1273                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1274                 restype = tle->resdom->restype;
1275
1276                 if (restype == UNKNOWNOID)
1277                 {
1278                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1279                                                                                          restype, TEXTOID, -1,
1280                                                                                          COERCION_IMPLICIT,
1281                                                                                          COERCE_IMPLICIT_CAST);
1282                         restype = tle->resdom->restype = TEXTOID;
1283                         tle->resdom->restypmod = -1;
1284                 }
1285
1286                 /*
1287                  * If the GROUP BY clause matches the ORDER BY clause, we want to
1288                  * adopt the ordering operators from the latter rather than using
1289                  * the default ops.  This allows "GROUP BY foo ORDER BY foo DESC"
1290                  * to be done with only one sort step.  Note we are assuming that
1291                  * any user-supplied ordering operator will bring equal values
1292                  * together, which is all that GROUP BY needs.
1293                  */
1294                 if (sortItem &&
1295                         ((SortClause *) lfirst(sortItem))->tleSortGroupRef ==
1296                         tle->resdom->ressortgroupref)
1297                 {
1298                         ordering_op = ((SortClause *) lfirst(sortItem))->sortop;
1299                         sortItem = lnext(sortItem);
1300                 }
1301                 else
1302                 {
1303                         ordering_op = ordering_oper_opid(restype);
1304                         sortItem = NULL;        /* disregard ORDER BY once match fails */
1305                 }
1306
1307                 grpcl = makeNode(GroupClause);
1308                 grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist);
1309                 grpcl->sortop = ordering_op;
1310                 glist = lappend(glist, grpcl);
1311         }
1312
1313         return glist;
1314 }
1315
1316 /*
1317  * transformSortClause -
1318  *        transform an ORDER BY clause
1319  *
1320  * ORDER BY items will be added to the targetlist (as resjunk columns)
1321  * if not already present, so the targetlist must be passed by reference.
1322  */
1323 List *
1324 transformSortClause(ParseState *pstate,
1325                                         List *orderlist,
1326                                         List **targetlist,
1327                                         bool resolveUnknown)
1328 {
1329         List       *sortlist = NIL;
1330         ListCell   *olitem;
1331
1332         foreach(olitem, orderlist)
1333         {
1334                 SortBy     *sortby = lfirst(olitem);
1335                 TargetEntry *tle;
1336
1337                 tle = findTargetlistEntry(pstate, sortby->node,
1338                                                                   targetlist, ORDER_CLAUSE);
1339
1340                 sortlist = addTargetToSortList(pstate, tle,
1341                                                                            sortlist, *targetlist,
1342                                                                            sortby->sortby_kind,
1343                                                                            sortby->useOp,
1344                                                                            resolveUnknown);
1345         }
1346
1347         return sortlist;
1348 }
1349
1350 /*
1351  * transformDistinctClause -
1352  *        transform a DISTINCT or DISTINCT ON clause
1353  *
1354  * Since we may need to add items to the query's sortClause list, that list
1355  * is passed by reference.      Likewise for the targetlist.
1356  */
1357 List *
1358 transformDistinctClause(ParseState *pstate, List *distinctlist,
1359                                                 List **targetlist, List **sortClause)
1360 {
1361         List       *result = NIL;
1362         ListCell   *slitem;
1363         ListCell   *dlitem;
1364
1365         /* No work if there was no DISTINCT clause */
1366         if (distinctlist == NIL)
1367                 return NIL;
1368
1369         if (linitial(distinctlist) == NULL)
1370         {
1371                 /* We had SELECT DISTINCT */
1372
1373                 /*
1374                  * All non-resjunk elements from target list that are not already
1375                  * in the sort list should be added to it.      (We don't really care
1376                  * what order the DISTINCT fields are checked in, so we can leave
1377                  * the user's ORDER BY spec alone, and just add additional sort
1378                  * keys to it to ensure that all targetlist items get sorted.)
1379                  */
1380                 *sortClause = addAllTargetsToSortList(pstate,
1381                                                                                           *sortClause,
1382                                                                                           *targetlist,
1383                                                                                           true);
1384
1385                 /*
1386                  * Now, DISTINCT list consists of all non-resjunk sortlist items.
1387                  * Actually, all the sortlist items had better be non-resjunk!
1388                  * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item
1389                  * that does not appear anywhere in the SELECT targetlist, and we
1390                  * can't implement that with only one sorting pass...
1391                  */
1392                 foreach(slitem, *sortClause)
1393                 {
1394                         SortClause *scl = (SortClause *) lfirst(slitem);
1395                         TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1396
1397                         if (tle->resdom->resjunk)
1398                                 ereport(ERROR,
1399                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1400                                                  errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1401                         else
1402                                 result = lappend(result, copyObject(scl));
1403                 }
1404         }
1405         else
1406         {
1407                 /* We had SELECT DISTINCT ON (expr, ...) */
1408
1409                 /*
1410                  * If the user writes both DISTINCT ON and ORDER BY, then the two
1411                  * expression lists must match (until one or the other runs out).
1412                  * Otherwise the ORDER BY requires a different sort order than the
1413                  * DISTINCT does, and we can't implement that with only one sort
1414                  * pass (and if we do two passes, the results will be rather
1415                  * unpredictable). However, it's OK to have more DISTINCT ON
1416                  * expressions than ORDER BY expressions; we can just add the
1417                  * extra DISTINCT values to the sort list, much as we did above
1418                  * for ordinary DISTINCT fields.
1419                  *
1420                  * Actually, it'd be OK for the common prefixes of the two lists to
1421                  * match in any order, but implementing that check seems like more
1422                  * trouble than it's worth.
1423                  */
1424                 ListCell   *nextsortlist = list_head(*sortClause);
1425
1426                 foreach(dlitem, distinctlist)
1427                 {
1428                         TargetEntry *tle;
1429
1430                         tle = findTargetlistEntry(pstate, lfirst(dlitem),
1431                                                                           targetlist, DISTINCT_ON_CLAUSE);
1432
1433                         if (nextsortlist != NULL)
1434                         {
1435                                 SortClause *scl = (SortClause *) lfirst(nextsortlist);
1436
1437                                 if (tle->resdom->ressortgroupref != scl->tleSortGroupRef)
1438                                         ereport(ERROR,
1439                                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1440                                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1441                                 result = lappend(result, copyObject(scl));
1442                                 nextsortlist = lnext(nextsortlist);
1443                         }
1444                         else
1445                         {
1446                                 *sortClause = addTargetToSortList(pstate, tle,
1447                                                                                                   *sortClause, *targetlist,
1448                                                                                                   SORTBY_ASC, NIL, true);
1449
1450                                 /*
1451                                  * Probably, the tle should always have been added at the
1452                                  * end of the sort list ... but search to be safe.
1453                                  */
1454                                 foreach(slitem, *sortClause)
1455                                 {
1456                                         SortClause *scl = (SortClause *) lfirst(slitem);
1457
1458                                         if (tle->resdom->ressortgroupref == scl->tleSortGroupRef)
1459                                         {
1460                                                 result = lappend(result, copyObject(scl));
1461                                                 break;
1462                                         }
1463                                 }
1464                                 if (slitem == NULL)             /* should not happen */
1465                                         elog(ERROR, "failed to add DISTINCT ON clause to target list");
1466                         }
1467                 }
1468         }
1469
1470         return result;
1471 }
1472
1473 /*
1474  * addAllTargetsToSortList
1475  *              Make sure all non-resjunk targets in the targetlist are in the
1476  *              ORDER BY list, adding the not-yet-sorted ones to the end of the list.
1477  *              This is typically used to help implement SELECT DISTINCT.
1478  *
1479  * See addTargetToSortList for info about pstate and resolveUnknown inputs.
1480  *
1481  * Returns the updated ORDER BY list.
1482  */
1483 List *
1484 addAllTargetsToSortList(ParseState *pstate, List *sortlist,
1485                                                 List *targetlist, bool resolveUnknown)
1486 {
1487         ListCell  *l;
1488
1489         foreach(l, targetlist)
1490         {
1491                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1492
1493                 if (!tle->resdom->resjunk)
1494                         sortlist = addTargetToSortList(pstate, tle,
1495                                                                                    sortlist, targetlist,
1496                                                                                    SORTBY_ASC, NIL,
1497                                                                                    resolveUnknown);
1498         }
1499         return sortlist;
1500 }
1501
1502 /*
1503  * addTargetToSortList
1504  *              If the given targetlist entry isn't already in the ORDER BY list,
1505  *              add it to the end of the list, using the sortop with given name
1506  *              or the default sort operator if opname == NIL.
1507  *
1508  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1509  * do nothing (which implies the search for a sort operator will fail).
1510  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1511  * otherwise.
1512  *
1513  * Returns the updated ORDER BY list.
1514  */
1515 List *
1516 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1517                                         List *sortlist, List *targetlist,
1518                                         int sortby_kind, List *sortby_opname,
1519                                         bool resolveUnknown)
1520 {
1521         /* avoid making duplicate sortlist entries */
1522         if (!targetIsInSortList(tle, sortlist))
1523         {
1524                 SortClause *sortcl = makeNode(SortClause);
1525                 Oid                     restype = tle->resdom->restype;
1526
1527                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1528                 if (restype == UNKNOWNOID && resolveUnknown)
1529                 {
1530                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1531                                                                                          restype, TEXTOID, -1,
1532                                                                                          COERCION_IMPLICIT,
1533                                                                                          COERCE_IMPLICIT_CAST);
1534                         restype = tle->resdom->restype = TEXTOID;
1535                         tle->resdom->restypmod = -1;
1536                 }
1537
1538                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1539
1540                 switch (sortby_kind)
1541                 {
1542                         case SORTBY_ASC:
1543                                 sortcl->sortop = ordering_oper_opid(restype);
1544                                 break;
1545                         case SORTBY_DESC:
1546                                 sortcl->sortop = reverse_ordering_oper_opid(restype);
1547                                 break;
1548                         case SORTBY_USING:
1549                                 Assert(sortby_opname != NIL);
1550                                 sortcl->sortop = compatible_oper_opid(sortby_opname,
1551                                                                                                           restype,
1552                                                                                                           restype,
1553                                                                                                           false);
1554                                 break;
1555                         default:
1556                                 elog(ERROR, "unrecognized sortby_kind: %d", sortby_kind);
1557                                 break;
1558                 }
1559
1560                 sortlist = lappend(sortlist, sortcl);
1561         }
1562         return sortlist;
1563 }
1564
1565 /*
1566  * assignSortGroupRef
1567  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1568  *        already have one.  Return the assigned or pre-existing refnumber.
1569  *
1570  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1571  */
1572 Index
1573 assignSortGroupRef(TargetEntry *tle, List *tlist)
1574 {
1575         Index           maxRef;
1576         ListCell   *l;
1577
1578         if (tle->resdom->ressortgroupref)       /* already has one? */
1579                 return tle->resdom->ressortgroupref;
1580
1581         /* easiest way to pick an unused refnumber: max used + 1 */
1582         maxRef = 0;
1583         foreach(l, tlist)
1584         {
1585                 Index           ref = ((TargetEntry *) lfirst(l))->resdom->ressortgroupref;
1586
1587                 if (ref > maxRef)
1588                         maxRef = ref;
1589         }
1590         tle->resdom->ressortgroupref = maxRef + 1;
1591         return tle->resdom->ressortgroupref;
1592 }
1593
1594 /*
1595  * targetIsInSortList
1596  *              Is the given target item already in the sortlist?
1597  *
1598  * Works for both SortClause and GroupClause lists.  Note that the main
1599  * reason we need this routine (and not just a quick test for nonzeroness
1600  * of ressortgroupref) is that a TLE might be in only one of the lists.
1601  */
1602 bool
1603 targetIsInSortList(TargetEntry *tle, List *sortList)
1604 {
1605         Index           ref = tle->resdom->ressortgroupref;
1606         ListCell   *l;
1607
1608         /* no need to scan list if tle has no marker */
1609         if (ref == 0)
1610                 return false;
1611
1612         foreach(l, sortList)
1613         {
1614                 SortClause *scl = (SortClause *) lfirst(l);
1615
1616                 if (scl->tleSortGroupRef == ref)
1617                         return true;
1618         }
1619         return false;
1620 }