]> granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c
When a GUC string variable is not set, print the empty string (in SHOW etc.),
[postgresql] / src / backend / parser / parse_clause.c
1 /*-------------------------------------------------------------------------
2  *
3  * parse_clause.c
4  *        handle clauses in parser
5  *
6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.154 2006/07/26 00:34:48 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include "access/heapam.h"
19 #include "catalog/heap.h"
20 #include "catalog/pg_type.h"
21 #include "commands/defrem.h"
22 #include "nodes/makefuncs.h"
23 #include "optimizer/clauses.h"
24 #include "optimizer/tlist.h"
25 #include "optimizer/var.h"
26 #include "parser/analyze.h"
27 #include "parser/parsetree.h"
28 #include "parser/parse_clause.h"
29 #include "parser/parse_coerce.h"
30 #include "parser/parse_expr.h"
31 #include "parser/parse_oper.h"
32 #include "parser/parse_relation.h"
33 #include "parser/parse_target.h"
34 #include "rewrite/rewriteManip.h"
35 #include "utils/guc.h"
36
37
38 #define ORDER_CLAUSE 0
39 #define GROUP_CLAUSE 1
40 #define DISTINCT_ON_CLAUSE 2
41
42 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
43
44 static void extractRemainingColumns(List *common_colnames,
45                                                 List *src_colnames, List *src_colvars,
46                                                 List **res_colnames, List **res_colvars);
47 static Node *transformJoinUsingClause(ParseState *pstate,
48                                                  List *leftVars, List *rightVars);
49 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
50                                           RangeTblEntry *l_rte,
51                                           RangeTblEntry *r_rte,
52                                           List *relnamespace,
53                                           Relids containedRels);
54 static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r);
55 static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
56                                                 RangeSubselect *r);
57 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
58                                            RangeFunction *r);
59 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
60                                                 RangeTblEntry **top_rte, int *top_rti,
61                                                 List **relnamespace,
62                                                 Relids *containedRels);
63 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
64                                    Var *l_colvar, Var *r_colvar);
65 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
66                                         List **tlist, int clause);
67
68
69 /*
70  * transformFromClause -
71  *        Process the FROM clause and add items to the query's range table,
72  *        joinlist, and namespaces.
73  *
74  * Note: we assume that pstate's p_rtable, p_joinlist, p_relnamespace, and
75  * p_varnamespace lists were initialized to NIL when the pstate was created.
76  * We will add onto any entries already present --- this is needed for rule
77  * processing, as well as for UPDATE and DELETE.
78  *
79  * The range table may grow still further when we transform the expressions
80  * in the query's quals and target list. (This is possible because in
81  * POSTQUEL, we allowed references to relations not specified in the
82  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
83  */
84 void
85 transformFromClause(ParseState *pstate, List *frmList)
86 {
87         ListCell   *fl;
88
89         /*
90          * The grammar will have produced a list of RangeVars, RangeSubselects,
91          * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding
92          * entries to the rtable), check for duplicate refnames, and then add it
93          * to the joinlist and namespaces.
94          */
95         foreach(fl, frmList)
96         {
97                 Node       *n = lfirst(fl);
98                 RangeTblEntry *rte;
99                 int                     rtindex;
100                 List       *relnamespace;
101                 Relids          containedRels;
102
103                 n = transformFromClauseItem(pstate, n,
104                                                                         &rte,
105                                                                         &rtindex,
106                                                                         &relnamespace,
107                                                                         &containedRels);
108                 checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
109                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
110                 pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
111                                                                                          relnamespace);
112                 pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
113                 bms_free(containedRels);
114         }
115 }
116
117 /*
118  * setTargetTable
119  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
120  *        and make the special links to it in the ParseState.
121  *
122  *        We also open the target relation and acquire a write lock on it.
123  *        This must be done before processing the FROM list, in case the target
124  *        is also mentioned as a source relation --- we want to be sure to grab
125  *        the write lock before any read lock.
126  *
127  *        If alsoSource is true, add the target to the query's joinlist and
128  *        namespace.  For INSERT, we don't want the target to be joined to;
129  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
130  *        we do need to scan or join the target.  (NOTE: we do not bother
131  *        to check for namespace conflict; we assume that the namespace was
132  *        initially empty in these cases.)
133  *
134  *        Finally, we mark the relation as requiring the permissions specified
135  *        by requiredPerms.
136  *
137  *        Returns the rangetable index of the target relation.
138  */
139 int
140 setTargetTable(ParseState *pstate, RangeVar *relation,
141                            bool inh, bool alsoSource, AclMode requiredPerms)
142 {
143         RangeTblEntry *rte;
144         int                     rtindex;
145
146         /* Close old target; this could only happen for multi-action rules */
147         if (pstate->p_target_relation != NULL)
148                 heap_close(pstate->p_target_relation, NoLock);
149
150         /*
151          * Open target rel and grab suitable lock (which we will hold till end of
152          * transaction).
153          *
154          * analyze.c will eventually do the corresponding heap_close(), but *not*
155          * release the lock.
156          */
157         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
158
159         /*
160          * Now build an RTE.
161          */
162         rte = addRangeTableEntryForRelation(pstate, pstate->p_target_relation,
163                                                                                 relation->alias, inh, false);
164         pstate->p_target_rangetblentry = rte;
165
166         /* assume new rte is at end */
167         rtindex = list_length(pstate->p_rtable);
168         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
169
170         /*
171          * Override addRangeTableEntry's default ACL_SELECT permissions check, and
172          * instead mark target table as requiring exactly the specified
173          * permissions.
174          *
175          * If we find an explicit reference to the rel later during parse
176          * analysis, scanRTEForColumn will add the ACL_SELECT bit back again. That
177          * can't happen for INSERT but it is possible for UPDATE and DELETE.
178          */
179         rte->requiredPerms = requiredPerms;
180
181         /*
182          * If UPDATE/DELETE, add table to joinlist and namespaces.
183          */
184         if (alsoSource)
185                 addRTEtoQuery(pstate, rte, true, true, true);
186
187         return rtindex;
188 }
189
190 /*
191  * Simplify InhOption (yes/no/default) into boolean yes/no.
192  *
193  * The reason we do things this way is that we don't want to examine the
194  * SQL_inheritance option flag until parse_analyze is run.      Otherwise,
195  * we'd do the wrong thing with query strings that intermix SET commands
196  * with queries.
197  */
198 bool
199 interpretInhOption(InhOption inhOpt)
200 {
201         switch (inhOpt)
202         {
203                 case INH_NO:
204                         return false;
205                 case INH_YES:
206                         return true;
207                 case INH_DEFAULT:
208                         return SQL_inheritance;
209         }
210         elog(ERROR, "bogus InhOption value: %d", inhOpt);
211         return false;                           /* keep compiler quiet */
212 }
213
214 /*
215  * Given a relation-options list (of DefElems), return true iff the specified
216  * table/result set should be created with OIDs. This needs to be done after
217  * parsing the query string because the return value can depend upon the
218  * default_with_oids GUC var.
219  */
220 bool
221 interpretOidsOption(List *defList)
222 {
223         ListCell   *cell;
224
225         /* Scan list to see if OIDS was included */
226         foreach(cell, defList)
227         {
228                 DefElem    *def = (DefElem *) lfirst(cell);
229
230                 if (pg_strcasecmp(def->defname, "oids") == 0)
231                         return defGetBoolean(def);
232         }
233
234         /* OIDS option was not specified, so use default. */
235         return default_with_oids;
236 }
237
238 /*
239  * Extract all not-in-common columns from column lists of a source table
240  */
241 static void
242 extractRemainingColumns(List *common_colnames,
243                                                 List *src_colnames, List *src_colvars,
244                                                 List **res_colnames, List **res_colvars)
245 {
246         List       *new_colnames = NIL;
247         List       *new_colvars = NIL;
248         ListCell   *lnames,
249                            *lvars;
250
251         Assert(list_length(src_colnames) == list_length(src_colvars));
252
253         forboth(lnames, src_colnames, lvars, src_colvars)
254         {
255                 char       *colname = strVal(lfirst(lnames));
256                 bool            match = false;
257                 ListCell   *cnames;
258
259                 foreach(cnames, common_colnames)
260                 {
261                         char       *ccolname = strVal(lfirst(cnames));
262
263                         if (strcmp(colname, ccolname) == 0)
264                         {
265                                 match = true;
266                                 break;
267                         }
268                 }
269
270                 if (!match)
271                 {
272                         new_colnames = lappend(new_colnames, lfirst(lnames));
273                         new_colvars = lappend(new_colvars, lfirst(lvars));
274                 }
275         }
276
277         *res_colnames = new_colnames;
278         *res_colvars = new_colvars;
279 }
280
281 /* transformJoinUsingClause()
282  *        Build a complete ON clause from a partially-transformed USING list.
283  *        We are given lists of nodes representing left and right match columns.
284  *        Result is a transformed qualification expression.
285  */
286 static Node *
287 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
288 {
289         Node       *result = NULL;
290         ListCell   *lvars,
291                            *rvars;
292
293         /*
294          * We cheat a little bit here by building an untransformed operator tree
295          * whose leaves are the already-transformed Vars.  This is OK because
296          * transformExpr() won't complain about already-transformed subnodes.
297          */
298         forboth(lvars, leftVars, rvars, rightVars)
299         {
300                 Node       *lvar = (Node *) lfirst(lvars);
301                 Node       *rvar = (Node *) lfirst(rvars);
302                 A_Expr     *e;
303
304                 e = makeSimpleA_Expr(AEXPR_OP, "=",
305                                                          copyObject(lvar), copyObject(rvar),
306                                                          -1);
307
308                 if (result == NULL)
309                         result = (Node *) e;
310                 else
311                 {
312                         A_Expr     *a;
313
314                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e, -1);
315                         result = (Node *) a;
316                 }
317         }
318
319         /*
320          * Since the references are already Vars, and are certainly from the input
321          * relations, we don't have to go through the same pushups that
322          * transformJoinOnClause() does.  Just invoke transformExpr() to fix up
323          * the operators, and we're done.
324          */
325         result = transformExpr(pstate, result);
326
327         result = coerce_to_boolean(pstate, result, "JOIN/USING");
328
329         return result;
330 }
331
332 /* transformJoinOnClause()
333  *        Transform the qual conditions for JOIN/ON.
334  *        Result is a transformed qualification expression.
335  */
336 static Node *
337 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
338                                           RangeTblEntry *l_rte,
339                                           RangeTblEntry *r_rte,
340                                           List *relnamespace,
341                                           Relids containedRels)
342 {
343         Node       *result;
344         List       *save_relnamespace;
345         List       *save_varnamespace;
346         Relids          clause_varnos;
347         int                     varno;
348
349         /*
350          * This is a tad tricky, for two reasons.  First, the namespace that the
351          * join expression should see is just the two subtrees of the JOIN plus
352          * any outer references from upper pstate levels.  So, temporarily set
353          * this pstate's namespace accordingly.  (We need not check for refname
354          * conflicts, because transformFromClauseItem() already did.) NOTE: this
355          * code is OK only because the ON clause can't legally alter the namespace
356          * by causing implicit relation refs to be added.
357          */
358         save_relnamespace = pstate->p_relnamespace;
359         save_varnamespace = pstate->p_varnamespace;
360
361         pstate->p_relnamespace = relnamespace;
362         pstate->p_varnamespace = list_make2(l_rte, r_rte);
363
364         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
365
366         pstate->p_relnamespace = save_relnamespace;
367         pstate->p_varnamespace = save_varnamespace;
368
369         /*
370          * Second, we need to check that the ON condition doesn't refer to any
371          * rels outside the input subtrees of the JOIN.  It could do that despite
372          * our hack on the namespace if it uses fully-qualified names. So, grovel
373          * through the transformed clause and make sure there are no bogus
374          * references.  (Outer references are OK, and are ignored here.)
375          */
376         clause_varnos = pull_varnos(result);
377         clause_varnos = bms_del_members(clause_varnos, containedRels);
378         if ((varno = bms_first_member(clause_varnos)) >= 0)
379         {
380                 ereport(ERROR,
381                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
382                  errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
383                                 rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
384         }
385         bms_free(clause_varnos);
386
387         return result;
388 }
389
390 /*
391  * transformTableEntry --- transform a RangeVar (simple relation reference)
392  */
393 static RangeTblEntry *
394 transformTableEntry(ParseState *pstate, RangeVar *r)
395 {
396         RangeTblEntry *rte;
397
398         /*
399          * mark this entry to indicate it comes from the FROM clause. In SQL, the
400          * target list can only refer to range variables specified in the from
401          * clause but we follow the more powerful POSTQUEL semantics and
402          * automatically generate the range variable if not specified. However
403          * there are times we need to know whether the entries are legitimate.
404          */
405         rte = addRangeTableEntry(pstate, r, r->alias,
406                                                          interpretInhOption(r->inhOpt), true);
407
408         return rte;
409 }
410
411
412 /*
413  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
414  */
415 static RangeTblEntry *
416 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
417 {
418         List       *parsetrees;
419         Query      *query;
420         RangeTblEntry *rte;
421
422         /*
423          * We require user to supply an alias for a subselect, per SQL92. To relax
424          * this, we'd have to be prepared to gin up a unique alias for an
425          * unlabeled subselect.
426          */
427         if (r->alias == NULL)
428                 ereport(ERROR,
429                                 (errcode(ERRCODE_SYNTAX_ERROR),
430                                  errmsg("subquery in FROM must have an alias")));
431
432         /*
433          * Analyze and transform the subquery.
434          */
435         parsetrees = parse_sub_analyze(r->subquery, pstate);
436
437         /*
438          * Check that we got something reasonable.      Most of these conditions are
439          * probably impossible given restrictions of the grammar, but check 'em
440          * anyway.
441          */
442         if (list_length(parsetrees) != 1)
443                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
444         query = (Query *) linitial(parsetrees);
445         if (query == NULL || !IsA(query, Query))
446                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
447
448         if (query->commandType != CMD_SELECT)
449                 elog(ERROR, "expected SELECT query from subquery in FROM");
450         if (query->resultRelation != 0 || query->into != NULL)
451                 ereport(ERROR,
452                                 (errcode(ERRCODE_SYNTAX_ERROR),
453                                  errmsg("subquery in FROM may not have SELECT INTO")));
454
455         /*
456          * The subquery cannot make use of any variables from FROM items created
457          * earlier in the current query.  Per SQL92, the scope of a FROM item does
458          * not include other FROM items.  Formerly we hacked the namespace so that
459          * the other variables weren't even visible, but it seems more useful to
460          * leave them visible and give a specific error message.
461          *
462          * XXX this will need further work to support SQL99's LATERAL() feature,
463          * wherein such references would indeed be legal.
464          *
465          * We can skip groveling through the subquery if there's not anything
466          * visible in the current query.  Also note that outer references are OK.
467          */
468         if (pstate->p_relnamespace || pstate->p_varnamespace)
469         {
470                 if (contain_vars_of_level((Node *) query, 1))
471                         ereport(ERROR,
472                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
473                                          errmsg("subquery in FROM may not refer to other relations of same query level")));
474         }
475
476         /*
477          * OK, build an RTE for the subquery.
478          */
479         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
480
481         return rte;
482 }
483
484
485 /*
486  * transformRangeFunction --- transform a function call appearing in FROM
487  */
488 static RangeTblEntry *
489 transformRangeFunction(ParseState *pstate, RangeFunction *r)
490 {
491         Node       *funcexpr;
492         char       *funcname;
493         RangeTblEntry *rte;
494
495         /*
496          * Get function name for possible use as alias.  We use the same
497          * transformation rules as for a SELECT output expression.      For a FuncCall
498          * node, the result will be the function name, but it is possible for the
499          * grammar to hand back other node types.
500          */
501         funcname = FigureColname(r->funccallnode);
502
503         /*
504          * Transform the raw expression.
505          */
506         funcexpr = transformExpr(pstate, r->funccallnode);
507
508         /*
509          * The function parameters cannot make use of any variables from other
510          * FROM items.  (Compare to transformRangeSubselect(); the coding is
511          * different though because we didn't parse as a sub-select with its own
512          * level of namespace.)
513          *
514          * XXX this will need further work to support SQL99's LATERAL() feature,
515          * wherein such references would indeed be legal.
516          */
517         if (pstate->p_relnamespace || pstate->p_varnamespace)
518         {
519                 if (contain_vars_of_level(funcexpr, 0))
520                         ereport(ERROR,
521                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
522                                          errmsg("function expression in FROM may not refer to other relations of same query level")));
523         }
524
525         /*
526          * Disallow aggregate functions in the expression.      (No reason to postpone
527          * this check until parseCheckAggregates.)
528          */
529         if (pstate->p_hasAggs)
530         {
531                 if (checkExprHasAggs(funcexpr))
532                         ereport(ERROR,
533                                         (errcode(ERRCODE_GROUPING_ERROR),
534                                          errmsg("cannot use aggregate function in function expression in FROM")));
535         }
536
537         /*
538          * OK, build an RTE for the function.
539          */
540         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
541                                                                                 r, true);
542
543         /*
544          * If a coldeflist was supplied, ensure it defines a legal set of names
545          * (no duplicates) and datatypes (no pseudo-types, for instance).
546          * addRangeTableEntryForFunction looked up the type names but didn't
547          * check them further than that.
548          */
549         if (r->coldeflist)
550         {
551                 TupleDesc       tupdesc;
552
553                 tupdesc = BuildDescFromLists(rte->eref->colnames,
554                                                                          rte->funccoltypes,
555                                                                          rte->funccoltypmods);
556                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
557         }
558
559         return rte;
560 }
561
562
563 /*
564  * transformFromClauseItem -
565  *        Transform a FROM-clause item, adding any required entries to the
566  *        range table list being built in the ParseState, and return the
567  *        transformed item ready to include in the joinlist and namespaces.
568  *        This routine can recurse to handle SQL92 JOIN expressions.
569  *
570  * The function return value is the node to add to the jointree (a
571  * RangeTblRef or JoinExpr).  Additional output parameters are:
572  *
573  * *top_rte: receives the RTE corresponding to the jointree item.
574  * (We could extract this from the function return node, but it saves cycles
575  * to pass it back separately.)
576  *
577  * *top_rti: receives the rangetable index of top_rte.  (Ditto.)
578  *
579  * *relnamespace: receives a List of the RTEs exposed as relation names
580  * by this item.
581  *
582  * *containedRels: receives a bitmap set of the rangetable indexes
583  * of all the base and join relations represented in this jointree item.
584  * This is needed for checking JOIN/ON conditions in higher levels.
585  *
586  * We do not need to pass back an explicit varnamespace value, because
587  * in all cases the varnamespace contribution is exactly top_rte.
588  */
589 static Node *
590 transformFromClauseItem(ParseState *pstate, Node *n,
591                                                 RangeTblEntry **top_rte, int *top_rti,
592                                                 List **relnamespace,
593                                                 Relids *containedRels)
594 {
595         if (IsA(n, RangeVar))
596         {
597                 /* Plain relation reference */
598                 RangeTblRef *rtr;
599                 RangeTblEntry *rte;
600                 int                     rtindex;
601
602                 rte = transformTableEntry(pstate, (RangeVar *) n);
603                 /* assume new rte is at end */
604                 rtindex = list_length(pstate->p_rtable);
605                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
606                 *top_rte = rte;
607                 *top_rti = rtindex;
608                 *relnamespace = list_make1(rte);
609                 *containedRels = bms_make_singleton(rtindex);
610                 rtr = makeNode(RangeTblRef);
611                 rtr->rtindex = rtindex;
612                 return (Node *) rtr;
613         }
614         else if (IsA(n, RangeSubselect))
615         {
616                 /* sub-SELECT is like a plain relation */
617                 RangeTblRef *rtr;
618                 RangeTblEntry *rte;
619                 int                     rtindex;
620
621                 rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
622                 /* assume new rte is at end */
623                 rtindex = list_length(pstate->p_rtable);
624                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
625                 *top_rte = rte;
626                 *top_rti = rtindex;
627                 *relnamespace = list_make1(rte);
628                 *containedRels = bms_make_singleton(rtindex);
629                 rtr = makeNode(RangeTblRef);
630                 rtr->rtindex = rtindex;
631                 return (Node *) rtr;
632         }
633         else if (IsA(n, RangeFunction))
634         {
635                 /* function is like a plain relation */
636                 RangeTblRef *rtr;
637                 RangeTblEntry *rte;
638                 int                     rtindex;
639
640                 rte = transformRangeFunction(pstate, (RangeFunction *) n);
641                 /* assume new rte is at end */
642                 rtindex = list_length(pstate->p_rtable);
643                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
644                 *top_rte = rte;
645                 *top_rti = rtindex;
646                 *relnamespace = list_make1(rte);
647                 *containedRels = bms_make_singleton(rtindex);
648                 rtr = makeNode(RangeTblRef);
649                 rtr->rtindex = rtindex;
650                 return (Node *) rtr;
651         }
652         else if (IsA(n, JoinExpr))
653         {
654                 /* A newfangled join expression */
655                 JoinExpr   *j = (JoinExpr *) n;
656                 RangeTblEntry *l_rte;
657                 RangeTblEntry *r_rte;
658                 int                     l_rtindex;
659                 int                     r_rtindex;
660                 Relids          l_containedRels,
661                                         r_containedRels,
662                                         my_containedRels;
663                 List       *l_relnamespace,
664                                    *r_relnamespace,
665                                    *my_relnamespace,
666                                    *l_colnames,
667                                    *r_colnames,
668                                    *res_colnames,
669                                    *l_colvars,
670                                    *r_colvars,
671                                    *res_colvars;
672                 RangeTblEntry *rte;
673
674                 /*
675                  * Recursively process the left and right subtrees
676                  */
677                 j->larg = transformFromClauseItem(pstate, j->larg,
678                                                                                   &l_rte,
679                                                                                   &l_rtindex,
680                                                                                   &l_relnamespace,
681                                                                                   &l_containedRels);
682                 j->rarg = transformFromClauseItem(pstate, j->rarg,
683                                                                                   &r_rte,
684                                                                                   &r_rtindex,
685                                                                                   &r_relnamespace,
686                                                                                   &r_containedRels);
687
688                 /*
689                  * Check for conflicting refnames in left and right subtrees. Must do
690                  * this because higher levels will assume I hand back a self-
691                  * consistent namespace subtree.
692                  */
693                 checkNameSpaceConflicts(pstate, l_relnamespace, r_relnamespace);
694
695                 /*
696                  * Generate combined relation membership info for possible use by
697                  * transformJoinOnClause below.
698                  */
699                 my_relnamespace = list_concat(l_relnamespace, r_relnamespace);
700                 my_containedRels = bms_join(l_containedRels, r_containedRels);
701
702                 pfree(r_relnamespace);  /* free unneeded list header */
703
704                 /*
705                  * Extract column name and var lists from both subtrees
706                  *
707                  * Note: expandRTE returns new lists, safe for me to modify
708                  */
709                 expandRTE(l_rte, l_rtindex, 0, false,
710                                   &l_colnames, &l_colvars);
711                 expandRTE(r_rte, r_rtindex, 0, false,
712                                   &r_colnames, &r_colvars);
713
714                 /*
715                  * Natural join does not explicitly specify columns; must generate
716                  * columns to join. Need to run through the list of columns from each
717                  * table or join result and match up the column names. Use the first
718                  * table, and check every column in the second table for a match.
719                  * (We'll check that the matches were unique later on.) The result of
720                  * this step is a list of column names just like an explicitly-written
721                  * USING list.
722                  */
723                 if (j->isNatural)
724                 {
725                         List       *rlist = NIL;
726                         ListCell   *lx,
727                                            *rx;
728
729                         Assert(j->using == NIL);        /* shouldn't have USING() too */
730
731                         foreach(lx, l_colnames)
732                         {
733                                 char       *l_colname = strVal(lfirst(lx));
734                                 Value      *m_name = NULL;
735
736                                 foreach(rx, r_colnames)
737                                 {
738                                         char       *r_colname = strVal(lfirst(rx));
739
740                                         if (strcmp(l_colname, r_colname) == 0)
741                                         {
742                                                 m_name = makeString(l_colname);
743                                                 break;
744                                         }
745                                 }
746
747                                 /* matched a right column? then keep as join column... */
748                                 if (m_name != NULL)
749                                         rlist = lappend(rlist, m_name);
750                         }
751
752                         j->using = rlist;
753                 }
754
755                 /*
756                  * Now transform the join qualifications, if any.
757                  */
758                 res_colnames = NIL;
759                 res_colvars = NIL;
760
761                 if (j->using)
762                 {
763                         /*
764                          * JOIN/USING (or NATURAL JOIN, as transformed above). Transform
765                          * the list into an explicit ON-condition, and generate a list of
766                          * merged result columns.
767                          */
768                         List       *ucols = j->using;
769                         List       *l_usingvars = NIL;
770                         List       *r_usingvars = NIL;
771                         ListCell   *ucol;
772
773                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
774
775                         foreach(ucol, ucols)
776                         {
777                                 char       *u_colname = strVal(lfirst(ucol));
778                                 ListCell   *col;
779                                 int                     ndx;
780                                 int                     l_index = -1;
781                                 int                     r_index = -1;
782                                 Var                *l_colvar,
783                                                    *r_colvar;
784
785                                 /* Check for USING(foo,foo) */
786                                 foreach(col, res_colnames)
787                                 {
788                                         char       *res_colname = strVal(lfirst(col));
789
790                                         if (strcmp(res_colname, u_colname) == 0)
791                                                 ereport(ERROR,
792                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
793                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
794                                                                                 u_colname)));
795                                 }
796
797                                 /* Find it in left input */
798                                 ndx = 0;
799                                 foreach(col, l_colnames)
800                                 {
801                                         char       *l_colname = strVal(lfirst(col));
802
803                                         if (strcmp(l_colname, u_colname) == 0)
804                                         {
805                                                 if (l_index >= 0)
806                                                         ereport(ERROR,
807                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
808                                                                          errmsg("common column name \"%s\" appears more than once in left table",
809                                                                                         u_colname)));
810                                                 l_index = ndx;
811                                         }
812                                         ndx++;
813                                 }
814                                 if (l_index < 0)
815                                         ereport(ERROR,
816                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
817                                                          errmsg("column \"%s\" specified in USING clause does not exist in left table",
818                                                                         u_colname)));
819
820                                 /* Find it in right input */
821                                 ndx = 0;
822                                 foreach(col, r_colnames)
823                                 {
824                                         char       *r_colname = strVal(lfirst(col));
825
826                                         if (strcmp(r_colname, u_colname) == 0)
827                                         {
828                                                 if (r_index >= 0)
829                                                         ereport(ERROR,
830                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
831                                                                          errmsg("common column name \"%s\" appears more than once in right table",
832                                                                                         u_colname)));
833                                                 r_index = ndx;
834                                         }
835                                         ndx++;
836                                 }
837                                 if (r_index < 0)
838                                         ereport(ERROR,
839                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
840                                                          errmsg("column \"%s\" specified in USING clause does not exist in right table",
841                                                                         u_colname)));
842
843                                 l_colvar = list_nth(l_colvars, l_index);
844                                 l_usingvars = lappend(l_usingvars, l_colvar);
845                                 r_colvar = list_nth(r_colvars, r_index);
846                                 r_usingvars = lappend(r_usingvars, r_colvar);
847
848                                 res_colnames = lappend(res_colnames, lfirst(ucol));
849                                 res_colvars = lappend(res_colvars,
850                                                                           buildMergedJoinVar(pstate,
851                                                                                                                  j->jointype,
852                                                                                                                  l_colvar,
853                                                                                                                  r_colvar));
854                         }
855
856                         j->quals = transformJoinUsingClause(pstate,
857                                                                                                 l_usingvars,
858                                                                                                 r_usingvars);
859                 }
860                 else if (j->quals)
861                 {
862                         /* User-written ON-condition; transform it */
863                         j->quals = transformJoinOnClause(pstate, j,
864                                                                                          l_rte, r_rte,
865                                                                                          my_relnamespace,
866                                                                                          my_containedRels);
867                 }
868                 else
869                 {
870                         /* CROSS JOIN: no quals */
871                 }
872
873                 /* Add remaining columns from each side to the output columns */
874                 extractRemainingColumns(res_colnames,
875                                                                 l_colnames, l_colvars,
876                                                                 &l_colnames, &l_colvars);
877                 extractRemainingColumns(res_colnames,
878                                                                 r_colnames, r_colvars,
879                                                                 &r_colnames, &r_colvars);
880                 res_colnames = list_concat(res_colnames, l_colnames);
881                 res_colvars = list_concat(res_colvars, l_colvars);
882                 res_colnames = list_concat(res_colnames, r_colnames);
883                 res_colvars = list_concat(res_colvars, r_colvars);
884
885                 /*
886                  * Check alias (AS clause), if any.
887                  */
888                 if (j->alias)
889                 {
890                         if (j->alias->colnames != NIL)
891                         {
892                                 if (list_length(j->alias->colnames) > list_length(res_colnames))
893                                         ereport(ERROR,
894                                                         (errcode(ERRCODE_SYNTAX_ERROR),
895                                                          errmsg("column alias list for \"%s\" has too many entries",
896                                                                         j->alias->aliasname)));
897                         }
898                 }
899
900                 /*
901                  * Now build an RTE for the result of the join
902                  */
903                 rte = addRangeTableEntryForJoin(pstate,
904                                                                                 res_colnames,
905                                                                                 j->jointype,
906                                                                                 res_colvars,
907                                                                                 j->alias,
908                                                                                 true);
909
910                 /* assume new rte is at end */
911                 j->rtindex = list_length(pstate->p_rtable);
912                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
913
914                 *top_rte = rte;
915                 *top_rti = j->rtindex;
916
917                 /*
918                  * Prepare returned namespace list.  If the JOIN has an alias then it
919                  * hides the contained RTEs as far as the relnamespace goes;
920                  * otherwise, put the contained RTEs and *not* the JOIN into
921                  * relnamespace.
922                  */
923                 if (j->alias)
924                 {
925                         *relnamespace = list_make1(rte);
926                         list_free(my_relnamespace);
927                 }
928                 else
929                         *relnamespace = my_relnamespace;
930
931                 /*
932                  * Include join RTE in returned containedRels set
933                  */
934                 *containedRels = bms_add_member(my_containedRels, j->rtindex);
935
936                 return (Node *) j;
937         }
938         else
939                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
940         return NULL;                            /* can't get here, keep compiler quiet */
941 }
942
943 /*
944  * buildMergedJoinVar -
945  *        generate a suitable replacement expression for a merged join column
946  */
947 static Node *
948 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
949                                    Var *l_colvar, Var *r_colvar)
950 {
951         Oid                     outcoltype;
952         int32           outcoltypmod;
953         Node       *l_node,
954                            *r_node,
955                            *res_node;
956
957         /*
958          * Choose output type if input types are dissimilar.
959          */
960         outcoltype = l_colvar->vartype;
961         outcoltypmod = l_colvar->vartypmod;
962         if (outcoltype != r_colvar->vartype)
963         {
964                 outcoltype = select_common_type(list_make2_oid(l_colvar->vartype,
965                                                                                                            r_colvar->vartype),
966                                                                                 "JOIN/USING");
967                 outcoltypmod = -1;              /* ie, unknown */
968         }
969         else if (outcoltypmod != r_colvar->vartypmod)
970         {
971                 /* same type, but not same typmod */
972                 outcoltypmod = -1;              /* ie, unknown */
973         }
974
975         /*
976          * Insert coercion functions if needed.  Note that a difference in typmod
977          * can only happen if input has typmod but outcoltypmod is -1. In that
978          * case we insert a RelabelType to clearly mark that result's typmod is
979          * not same as input.  We never need coerce_type_typmod.
980          */
981         if (l_colvar->vartype != outcoltype)
982                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
983                                                          outcoltype, outcoltypmod,
984                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
985         else if (l_colvar->vartypmod != outcoltypmod)
986                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
987                                                                                   outcoltype, outcoltypmod,
988                                                                                   COERCE_IMPLICIT_CAST);
989         else
990                 l_node = (Node *) l_colvar;
991
992         if (r_colvar->vartype != outcoltype)
993                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
994                                                          outcoltype, outcoltypmod,
995                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
996         else if (r_colvar->vartypmod != outcoltypmod)
997                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
998                                                                                   outcoltype, outcoltypmod,
999                                                                                   COERCE_IMPLICIT_CAST);
1000         else
1001                 r_node = (Node *) r_colvar;
1002
1003         /*
1004          * Choose what to emit
1005          */
1006         switch (jointype)
1007         {
1008                 case JOIN_INNER:
1009
1010                         /*
1011                          * We can use either var; prefer non-coerced one if available.
1012                          */
1013                         if (IsA(l_node, Var))
1014                                 res_node = l_node;
1015                         else if (IsA(r_node, Var))
1016                                 res_node = r_node;
1017                         else
1018                                 res_node = l_node;
1019                         break;
1020                 case JOIN_LEFT:
1021                         /* Always use left var */
1022                         res_node = l_node;
1023                         break;
1024                 case JOIN_RIGHT:
1025                         /* Always use right var */
1026                         res_node = r_node;
1027                         break;
1028                 case JOIN_FULL:
1029                         {
1030                                 /*
1031                                  * Here we must build a COALESCE expression to ensure that the
1032                                  * join output is non-null if either input is.
1033                                  */
1034                                 CoalesceExpr *c = makeNode(CoalesceExpr);
1035
1036                                 c->coalescetype = outcoltype;
1037                                 c->args = list_make2(l_node, r_node);
1038                                 res_node = (Node *) c;
1039                                 break;
1040                         }
1041                 default:
1042                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
1043                         res_node = NULL;        /* keep compiler quiet */
1044                         break;
1045         }
1046
1047         return res_node;
1048 }
1049
1050
1051 /*
1052  * transformWhereClause -
1053  *        Transform the qualification and make sure it is of type boolean.
1054  *        Used for WHERE and allied clauses.
1055  *
1056  * constructName does not affect the semantics, but is used in error messages
1057  */
1058 Node *
1059 transformWhereClause(ParseState *pstate, Node *clause,
1060                                          const char *constructName)
1061 {
1062         Node       *qual;
1063
1064         if (clause == NULL)
1065                 return NULL;
1066
1067         qual = transformExpr(pstate, clause);
1068
1069         qual = coerce_to_boolean(pstate, qual, constructName);
1070
1071         return qual;
1072 }
1073
1074
1075 /*
1076  * transformLimitClause -
1077  *        Transform the expression and make sure it is of type integer.
1078  *        Used for LIMIT and allied clauses.
1079  *
1080  * constructName does not affect the semantics, but is used in error messages
1081  */
1082 Node *
1083 transformLimitClause(ParseState *pstate, Node *clause,
1084                                          const char *constructName)
1085 {
1086         Node       *qual;
1087
1088         if (clause == NULL)
1089                 return NULL;
1090
1091         qual = transformExpr(pstate, clause);
1092
1093         qual = coerce_to_integer64(pstate, qual, constructName);
1094
1095         /*
1096          * LIMIT can't refer to any vars or aggregates of the current query; we
1097          * don't allow subselects either (though that case would at least be
1098          * sensible)
1099          */
1100         if (contain_vars_of_level(qual, 0))
1101         {
1102                 ereport(ERROR,
1103                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1104                 /* translator: %s is name of a SQL construct, eg LIMIT */
1105                                  errmsg("argument of %s must not contain variables",
1106                                                 constructName)));
1107         }
1108         if (checkExprHasAggs(qual))
1109         {
1110                 ereport(ERROR,
1111                                 (errcode(ERRCODE_GROUPING_ERROR),
1112                 /* translator: %s is name of a SQL construct, eg LIMIT */
1113                                  errmsg("argument of %s must not contain aggregates",
1114                                                 constructName)));
1115         }
1116         if (contain_subplans(qual))
1117         {
1118                 ereport(ERROR,
1119                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1120                 /* translator: %s is name of a SQL construct, eg LIMIT */
1121                                  errmsg("argument of %s must not contain subqueries",
1122                                                 constructName)));
1123         }
1124
1125         return qual;
1126 }
1127
1128
1129 /*
1130  *      findTargetlistEntry -
1131  *        Returns the targetlist entry matching the given (untransformed) node.
1132  *        If no matching entry exists, one is created and appended to the target
1133  *        list as a "resjunk" node.
1134  *
1135  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1136  * tlist        the target list (passed by reference so we can append to it)
1137  * clause       identifies clause type being processed
1138  */
1139 static TargetEntry *
1140 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1141 {
1142         TargetEntry *target_result = NULL;
1143         ListCell   *tl;
1144         Node       *expr;
1145
1146         /*----------
1147          * Handle two special cases as mandated by the SQL92 spec:
1148          *
1149          * 1. Bare ColumnName (no qualifier or subscripts)
1150          *        For a bare identifier, we search for a matching column name
1151          *        in the existing target list.  Multiple matches are an error
1152          *        unless they refer to identical values; for example,
1153          *        we allow      SELECT a, a FROM table ORDER BY a
1154          *        but not       SELECT a AS b, b FROM table ORDER BY b
1155          *        If no match is found, we fall through and treat the identifier
1156          *        as an expression.
1157          *        For GROUP BY, it is incorrect to match the grouping item against
1158          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1159          *        is a reference to a column name exposed by FROM, not to a target
1160          *        list column.  However, many implementations (including pre-7.0
1161          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1162          *        to see if the identifier matches any FROM column name, and only
1163          *        try for a targetlist name if it doesn't.  This ensures that we
1164          *        adhere to the spec in the case where the name could be both.
1165          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1166          *        we choose to make it work like ORDER BY, on the rather flimsy
1167          *        grounds that ordinary DISTINCT works on targetlist entries.
1168          *
1169          * 2. IntegerConstant
1170          *        This means to use the n'th item in the existing target list.
1171          *        Note that it would make no sense to order/group/distinct by an
1172          *        actual constant, so this does not create a conflict with our
1173          *        extension to order/group by an expression.
1174          *        GROUP BY column-number is not allowed by SQL92, but since
1175          *        the standard has no other behavior defined for this syntax,
1176          *        we may as well accept this common extension.
1177          *
1178          * Note that pre-existing resjunk targets must not be used in either case,
1179          * since the user didn't write them in his SELECT list.
1180          *
1181          * If neither special case applies, fall through to treat the item as
1182          * an expression.
1183          *----------
1184          */
1185         if (IsA(node, ColumnRef) &&
1186                 list_length(((ColumnRef *) node)->fields) == 1)
1187         {
1188                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
1189                 int                     location = ((ColumnRef *) node)->location;
1190
1191                 if (clause == GROUP_CLAUSE)
1192                 {
1193                         /*
1194                          * In GROUP BY, we must prefer a match against a FROM-clause
1195                          * column to one against the targetlist.  Look to see if there is
1196                          * a matching column.  If so, fall through to let transformExpr()
1197                          * do the rest.  NOTE: if name could refer ambiguously to more
1198                          * than one column name exposed by FROM, colNameToVar will
1199                          * ereport(ERROR).      That's just what we want here.
1200                          *
1201                          * Small tweak for 7.4.3: ignore matches in upper query levels.
1202                          * This effectively changes the search order for bare names to (1)
1203                          * local FROM variables, (2) local targetlist aliases, (3) outer
1204                          * FROM variables, whereas before it was (1) (3) (2). SQL92 and
1205                          * SQL99 do not allow GROUPing BY an outer reference, so this
1206                          * breaks no cases that are legal per spec, and it seems a more
1207                          * self-consistent behavior.
1208                          */
1209                         if (colNameToVar(pstate, name, true, location) != NULL)
1210                                 name = NULL;
1211                 }
1212
1213                 if (name != NULL)
1214                 {
1215                         foreach(tl, *tlist)
1216                         {
1217                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1218
1219                                 if (!tle->resjunk &&
1220                                         strcmp(tle->resname, name) == 0)
1221                                 {
1222                                         if (target_result != NULL)
1223                                         {
1224                                                 if (!equal(target_result->expr, tle->expr))
1225                                                         ereport(ERROR,
1226                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1227
1228                                                         /*
1229                                                          * translator: first %s is name of a SQL
1230                                                          * construct, eg ORDER BY
1231                                                          */
1232                                                                          errmsg("%s \"%s\" is ambiguous",
1233                                                                                         clauseText[clause], name),
1234                                                                          parser_errposition(pstate, location)));
1235                                         }
1236                                         else
1237                                                 target_result = tle;
1238                                         /* Stay in loop to check for ambiguity */
1239                                 }
1240                         }
1241                         if (target_result != NULL)
1242                                 return target_result;   /* return the first match */
1243                 }
1244         }
1245         if (IsA(node, A_Const))
1246         {
1247                 Value      *val = &((A_Const *) node)->val;
1248                 int                     targetlist_pos = 0;
1249                 int                     target_pos;
1250
1251                 if (!IsA(val, Integer))
1252                         ereport(ERROR,
1253                                         (errcode(ERRCODE_SYNTAX_ERROR),
1254                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1255                                          errmsg("non-integer constant in %s",
1256                                                         clauseText[clause])));
1257                 target_pos = intVal(val);
1258                 foreach(tl, *tlist)
1259                 {
1260                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1261
1262                         if (!tle->resjunk)
1263                         {
1264                                 if (++targetlist_pos == target_pos)
1265                                         return tle; /* return the unique match */
1266                         }
1267                 }
1268                 ereport(ERROR,
1269                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1270                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1271                                  errmsg("%s position %d is not in select list",
1272                                                 clauseText[clause], target_pos)));
1273         }
1274
1275         /*
1276          * Otherwise, we have an expression (this is a Postgres extension not
1277          * found in SQL92).  Convert the untransformed node to a transformed
1278          * expression, and search for a match in the tlist. NOTE: it doesn't
1279          * really matter whether there is more than one match.  Also, we are
1280          * willing to match a resjunk target here, though the above cases must
1281          * ignore resjunk targets.
1282          */
1283         expr = transformExpr(pstate, node);
1284
1285         foreach(tl, *tlist)
1286         {
1287                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1288
1289                 if (equal(expr, tle->expr))
1290                         return tle;
1291         }
1292
1293         /*
1294          * If no matches, construct a new target entry which is appended to the
1295          * end of the target list.      This target is given resjunk = TRUE so that it
1296          * will not be projected into the final tuple.
1297          */
1298         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1299
1300         *tlist = lappend(*tlist, target_result);
1301
1302         return target_result;
1303 }
1304
1305 static GroupClause *
1306 make_group_clause(TargetEntry *tle, List *targetlist, Oid sortop)
1307 {
1308         GroupClause *result;
1309
1310         result = makeNode(GroupClause);
1311         result->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1312         result->sortop = sortop;
1313         return result;
1314 }
1315
1316 /*
1317  * transformGroupClause -
1318  *        transform a GROUP BY clause
1319  *
1320  * GROUP BY items will be added to the targetlist (as resjunk columns)
1321  * if not already present, so the targetlist must be passed by reference.
1322  *
1323  * The order of the elements of the grouping clause does not affect
1324  * the semantics of the query. However, the optimizer is not currently
1325  * smart enough to reorder the grouping clause, so we try to do some
1326  * primitive reordering here.
1327  */
1328 List *
1329 transformGroupClause(ParseState *pstate, List *grouplist,
1330                                          List **targetlist, List *sortClause)
1331 {
1332         List       *result = NIL;
1333         List       *tle_list = NIL;
1334         ListCell   *l;
1335
1336         /* Preprocess the grouping clause, lookup TLEs */
1337         foreach (l, grouplist)
1338         {
1339                 TargetEntry *tle;
1340                 Oid                      restype;
1341
1342                 tle = findTargetlistEntry(pstate, lfirst(l),
1343                                                                   targetlist, GROUP_CLAUSE);
1344
1345                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1346                 restype = exprType((Node *) tle->expr);
1347
1348                 if (restype == UNKNOWNOID)
1349                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1350                                                                                          restype, TEXTOID, -1,
1351                                                                                          COERCION_IMPLICIT,
1352                                                                                          COERCE_IMPLICIT_CAST);
1353
1354                 tle_list = lappend(tle_list, tle);
1355         }
1356
1357         /*
1358          * Now iterate through the ORDER BY clause. If we find a grouping
1359          * element that matches the ORDER BY element, append the grouping
1360          * element to the result set immediately. Otherwise, stop
1361          * iterating. The effect of this is to look for a prefix of the
1362          * ORDER BY list in the grouping clauses, and to move that prefix
1363          * to the front of the GROUP BY.
1364          */
1365         foreach (l, sortClause)
1366         {
1367                 SortClause      *sc = (SortClause *) lfirst(l);
1368                 ListCell        *prev = NULL;
1369                 ListCell        *tl;
1370                 bool             found = false;
1371
1372                 foreach (tl, tle_list)
1373                 {
1374                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1375
1376                         if (sc->tleSortGroupRef == tle->ressortgroupref)
1377                         {
1378                                 GroupClause *gc;
1379
1380                                 tle_list = list_delete_cell(tle_list, tl, prev);
1381
1382                                 /* Use the sort clause's sorting operator */
1383                                 gc = make_group_clause(tle, *targetlist, sc->sortop);
1384                                 result = lappend(result, gc);
1385                                 found = true;
1386                                 break;
1387                         }
1388
1389                         prev = tl;
1390                 }
1391
1392                 /* As soon as we've failed to match an ORDER BY element, stop */
1393                 if (!found)
1394                         break;
1395         }
1396
1397         /*
1398          * Now add any remaining elements of the GROUP BY list in the
1399          * order we received them.
1400          *
1401          * XXX: are there any additional criteria to consider when
1402          * ordering grouping clauses?
1403          */
1404         foreach(l, tle_list)
1405         {
1406                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1407                 GroupClause *gc;
1408                 Oid                      sort_op;
1409
1410                 /* avoid making duplicate grouplist entries */
1411                 if (targetIsInSortList(tle, result))
1412                         continue;
1413
1414                 sort_op = ordering_oper_opid(exprType((Node *) tle->expr));
1415                 gc = make_group_clause(tle, *targetlist, sort_op);
1416                 result = lappend(result, gc);
1417         }
1418
1419         list_free(tle_list);
1420         return result;
1421 }
1422
1423 /*
1424  * transformSortClause -
1425  *        transform an ORDER BY clause
1426  *
1427  * ORDER BY items will be added to the targetlist (as resjunk columns)
1428  * if not already present, so the targetlist must be passed by reference.
1429  */
1430 List *
1431 transformSortClause(ParseState *pstate,
1432                                         List *orderlist,
1433                                         List **targetlist,
1434                                         bool resolveUnknown)
1435 {
1436         List       *sortlist = NIL;
1437         ListCell   *olitem;
1438
1439         foreach(olitem, orderlist)
1440         {
1441                 SortBy     *sortby = lfirst(olitem);
1442                 TargetEntry *tle;
1443
1444                 tle = findTargetlistEntry(pstate, sortby->node,
1445                                                                   targetlist, ORDER_CLAUSE);
1446
1447                 sortlist = addTargetToSortList(pstate, tle,
1448                                                                            sortlist, *targetlist,
1449                                                                            sortby->sortby_kind,
1450                                                                            sortby->useOp,
1451                                                                            resolveUnknown);
1452         }
1453
1454         return sortlist;
1455 }
1456
1457 /*
1458  * transformDistinctClause -
1459  *        transform a DISTINCT or DISTINCT ON clause
1460  *
1461  * Since we may need to add items to the query's sortClause list, that list
1462  * is passed by reference.      Likewise for the targetlist.
1463  */
1464 List *
1465 transformDistinctClause(ParseState *pstate, List *distinctlist,
1466                                                 List **targetlist, List **sortClause)
1467 {
1468         List       *result = NIL;
1469         ListCell   *slitem;
1470         ListCell   *dlitem;
1471
1472         /* No work if there was no DISTINCT clause */
1473         if (distinctlist == NIL)
1474                 return NIL;
1475
1476         if (linitial(distinctlist) == NULL)
1477         {
1478                 /* We had SELECT DISTINCT */
1479
1480                 /*
1481                  * All non-resjunk elements from target list that are not already in
1482                  * the sort list should be added to it.  (We don't really care what
1483                  * order the DISTINCT fields are checked in, so we can leave the
1484                  * user's ORDER BY spec alone, and just add additional sort keys to it
1485                  * to ensure that all targetlist items get sorted.)
1486                  */
1487                 *sortClause = addAllTargetsToSortList(pstate,
1488                                                                                           *sortClause,
1489                                                                                           *targetlist,
1490                                                                                           true);
1491
1492                 /*
1493                  * Now, DISTINCT list consists of all non-resjunk sortlist items.
1494                  * Actually, all the sortlist items had better be non-resjunk!
1495                  * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that
1496                  * does not appear anywhere in the SELECT targetlist, and we can't
1497                  * implement that with only one sorting pass...
1498                  */
1499                 foreach(slitem, *sortClause)
1500                 {
1501                         SortClause *scl = (SortClause *) lfirst(slitem);
1502                         TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1503
1504                         if (tle->resjunk)
1505                                 ereport(ERROR,
1506                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1507                                                  errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1508                         else
1509                                 result = lappend(result, copyObject(scl));
1510                 }
1511         }
1512         else
1513         {
1514                 /* We had SELECT DISTINCT ON (expr, ...) */
1515
1516                 /*
1517                  * If the user writes both DISTINCT ON and ORDER BY, then the two
1518                  * expression lists must match (until one or the other runs out).
1519                  * Otherwise the ORDER BY requires a different sort order than the
1520                  * DISTINCT does, and we can't implement that with only one sort pass
1521                  * (and if we do two passes, the results will be rather
1522                  * unpredictable). However, it's OK to have more DISTINCT ON
1523                  * expressions than ORDER BY expressions; we can just add the extra
1524                  * DISTINCT values to the sort list, much as we did above for ordinary
1525                  * DISTINCT fields.
1526                  *
1527                  * Actually, it'd be OK for the common prefixes of the two lists to
1528                  * match in any order, but implementing that check seems like more
1529                  * trouble than it's worth.
1530                  */
1531                 ListCell   *nextsortlist = list_head(*sortClause);
1532
1533                 foreach(dlitem, distinctlist)
1534                 {
1535                         TargetEntry *tle;
1536
1537                         tle = findTargetlistEntry(pstate, lfirst(dlitem),
1538                                                                           targetlist, DISTINCT_ON_CLAUSE);
1539
1540                         if (nextsortlist != NULL)
1541                         {
1542                                 SortClause *scl = (SortClause *) lfirst(nextsortlist);
1543
1544                                 if (tle->ressortgroupref != scl->tleSortGroupRef)
1545                                         ereport(ERROR,
1546                                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1547                                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1548                                 result = lappend(result, copyObject(scl));
1549                                 nextsortlist = lnext(nextsortlist);
1550                         }
1551                         else
1552                         {
1553                                 *sortClause = addTargetToSortList(pstate, tle,
1554                                                                                                   *sortClause, *targetlist,
1555                                                                                                   SORTBY_ASC, NIL, true);
1556
1557                                 /*
1558                                  * Probably, the tle should always have been added at the end
1559                                  * of the sort list ... but search to be safe.
1560                                  */
1561                                 foreach(slitem, *sortClause)
1562                                 {
1563                                         SortClause *scl = (SortClause *) lfirst(slitem);
1564
1565                                         if (tle->ressortgroupref == scl->tleSortGroupRef)
1566                                         {
1567                                                 result = lappend(result, copyObject(scl));
1568                                                 break;
1569                                         }
1570                                 }
1571                                 if (slitem == NULL)             /* should not happen */
1572                                         elog(ERROR, "failed to add DISTINCT ON clause to target list");
1573                         }
1574                 }
1575         }
1576
1577         return result;
1578 }
1579
1580 /*
1581  * addAllTargetsToSortList
1582  *              Make sure all non-resjunk targets in the targetlist are in the
1583  *              ORDER BY list, adding the not-yet-sorted ones to the end of the list.
1584  *              This is typically used to help implement SELECT DISTINCT.
1585  *
1586  * See addTargetToSortList for info about pstate and resolveUnknown inputs.
1587  *
1588  * Returns the updated ORDER BY list.
1589  */
1590 List *
1591 addAllTargetsToSortList(ParseState *pstate, List *sortlist,
1592                                                 List *targetlist, bool resolveUnknown)
1593 {
1594         ListCell   *l;
1595
1596         foreach(l, targetlist)
1597         {
1598                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1599
1600                 if (!tle->resjunk)
1601                         sortlist = addTargetToSortList(pstate, tle,
1602                                                                                    sortlist, targetlist,
1603                                                                                    SORTBY_ASC, NIL,
1604                                                                                    resolveUnknown);
1605         }
1606         return sortlist;
1607 }
1608
1609 /*
1610  * addTargetToSortList
1611  *              If the given targetlist entry isn't already in the ORDER BY list,
1612  *              add it to the end of the list, using the sortop with given name
1613  *              or the default sort operator if opname == NIL.
1614  *
1615  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1616  * do nothing (which implies the search for a sort operator will fail).
1617  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1618  * otherwise.
1619  *
1620  * Returns the updated ORDER BY list.
1621  */
1622 List *
1623 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1624                                         List *sortlist, List *targetlist,
1625                                         int sortby_kind, List *sortby_opname,
1626                                         bool resolveUnknown)
1627 {
1628         /* avoid making duplicate sortlist entries */
1629         if (!targetIsInSortList(tle, sortlist))
1630         {
1631                 SortClause *sortcl = makeNode(SortClause);
1632                 Oid                     restype = exprType((Node *) tle->expr);
1633
1634                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1635                 if (restype == UNKNOWNOID && resolveUnknown)
1636                 {
1637                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1638                                                                                          restype, TEXTOID, -1,
1639                                                                                          COERCION_IMPLICIT,
1640                                                                                          COERCE_IMPLICIT_CAST);
1641                         restype = TEXTOID;
1642                 }
1643
1644                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1645
1646                 switch (sortby_kind)
1647                 {
1648                         case SORTBY_ASC:
1649                                 sortcl->sortop = ordering_oper_opid(restype);
1650                                 break;
1651                         case SORTBY_DESC:
1652                                 sortcl->sortop = reverse_ordering_oper_opid(restype);
1653                                 break;
1654                         case SORTBY_USING:
1655                                 Assert(sortby_opname != NIL);
1656                                 sortcl->sortop = compatible_oper_opid(sortby_opname,
1657                                                                                                           restype,
1658                                                                                                           restype,
1659                                                                                                           false);
1660                                 break;
1661                         default:
1662                                 elog(ERROR, "unrecognized sortby_kind: %d", sortby_kind);
1663                                 break;
1664                 }
1665
1666                 sortlist = lappend(sortlist, sortcl);
1667         }
1668         return sortlist;
1669 }
1670
1671 /*
1672  * assignSortGroupRef
1673  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1674  *        already have one.  Return the assigned or pre-existing refnumber.
1675  *
1676  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1677  */
1678 Index
1679 assignSortGroupRef(TargetEntry *tle, List *tlist)
1680 {
1681         Index           maxRef;
1682         ListCell   *l;
1683
1684         if (tle->ressortgroupref)       /* already has one? */
1685                 return tle->ressortgroupref;
1686
1687         /* easiest way to pick an unused refnumber: max used + 1 */
1688         maxRef = 0;
1689         foreach(l, tlist)
1690         {
1691                 Index           ref = ((TargetEntry *) lfirst(l))->ressortgroupref;
1692
1693                 if (ref > maxRef)
1694                         maxRef = ref;
1695         }
1696         tle->ressortgroupref = maxRef + 1;
1697         return tle->ressortgroupref;
1698 }
1699
1700 /*
1701  * targetIsInSortList
1702  *              Is the given target item already in the sortlist?
1703  *
1704  * Works for both SortClause and GroupClause lists.  Note that the main
1705  * reason we need this routine (and not just a quick test for nonzeroness
1706  * of ressortgroupref) is that a TLE might be in only one of the lists.
1707  */
1708 bool
1709 targetIsInSortList(TargetEntry *tle, List *sortList)
1710 {
1711         Index           ref = tle->ressortgroupref;
1712         ListCell   *l;
1713
1714         /* no need to scan list if tle has no marker */
1715         if (ref == 0)
1716                 return false;
1717
1718         foreach(l, sortList)
1719         {
1720                 SortClause *scl = (SortClause *) lfirst(l);
1721
1722                 if (scl->tleSortGroupRef == ref)
1723                         return true;
1724         }
1725         return false;
1726 }