granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parse_clause.c
   4  *        handle clauses in parser
   5  *
   6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.133 2004/06/16 01:26:44 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/heapam.h"
  19 #include "catalog/heap.h"
  20 #include "nodes/makefuncs.h"
  21 #include "optimizer/clauses.h"
  22 #include "optimizer/tlist.h"
  23 #include "optimizer/var.h"
  24 #include "parser/analyze.h"
  25 #include "parser/parsetree.h"
  26 #include "parser/parse_clause.h"
  27 #include "parser/parse_coerce.h"
  28 #include "parser/parse_expr.h"
  29 #include "parser/parse_oper.h"
  30 #include "parser/parse_relation.h"
  31 #include "parser/parse_target.h"
  32 #include "parser/parse_type.h"
  33 #include "rewrite/rewriteManip.h"
  34 #include "utils/builtins.h"
  35 #include "utils/guc.h"
  36
  37
  38 #define ORDER_CLAUSE 0
  39 #define GROUP_CLAUSE 1
  40 #define DISTINCT_ON_CLAUSE 2
  41
  42 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
  43
  44 static void extractRemainingColumns(List *common_colnames,
  45                                                 List *src_colnames, List *src_colvars,
  46                                                 List **res_colnames, List **res_colvars);
  47 static Node *transformJoinUsingClause(ParseState *pstate,
  48                                                  List *leftVars, List *rightVars);
  49 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
  50                                           List *containedRels);
  51 static RangeTblRef *transformTableEntry(ParseState *pstate, RangeVar *r);
  52 static RangeTblRef *transformRangeSubselect(ParseState *pstate,
  53                                                 RangeSubselect *r);
  54 static RangeTblRef *transformRangeFunction(ParseState *pstate,
  55                                            RangeFunction *r);
  56 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
  57                                                 List **containedRels);
  58 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
  59                                    Var *l_colvar, Var *r_colvar);
  60 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
  61                                         List **tlist, int clause);
  62
  63
  64 /*
  65  * transformFromClause -
  66  *        Process the FROM clause and add items to the query's range table,
  67  *        joinlist, and namespace.
  68  *
  69  * Note: we assume that pstate's p_rtable, p_joinlist, and p_namespace lists
  70  * were initialized to NIL when the pstate was created.  We will add onto
  71  * any entries already present --- this is needed for rule processing, as
  72  * well as for UPDATE and DELETE.
  73  *
  74  * The range table may grow still further when we transform the expressions
  75  * in the query's quals and target list. (This is possible because in
  76  * POSTQUEL, we allowed references to relations not specified in the
  77  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
  78  */
  79 void
  80 transformFromClause(ParseState *pstate, List *frmList)
  81 {
  82         ListCell   *fl;
  83
  84         /*
  85          * The grammar will have produced a list of RangeVars,
  86          * RangeSubselects, RangeFunctions, and/or JoinExprs. Transform each
  87          * one (possibly adding entries to the rtable), check for duplicate
  88          * refnames, and then add it to the joinlist and namespace.
  89          */
  90         foreach(fl, frmList)
  91         {
  92                 Node       *n = lfirst(fl);
  93                 List       *containedRels;
  94
  95                 n = transformFromClauseItem(pstate, n, &containedRels);
  96                 checkNameSpaceConflicts(pstate, (Node *) pstate->p_namespace, n);
  97                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
  98                 pstate->p_namespace = lappend(pstate->p_namespace, n);
  99         }
 100 }
 101
 102 /*
 103  * setTargetTable
 104  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
 105  *        and make the special links to it in the ParseState.
 106  *
 107  *        We also open the target relation and acquire a write lock on it.
 108  *        This must be done before processing the FROM list, in case the target
 109  *        is also mentioned as a source relation --- we want to be sure to grab
 110  *        the write lock before any read lock.
 111  *
 112  *        If alsoSource is true, add the target to the query's joinlist and
 113  *        namespace.  For INSERT, we don't want the target to be joined to;
 114  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
 115  *        we do need to scan or join the target.  (NOTE: we do not bother
 116  *        to check for namespace conflict; we assume that the namespace was
 117  *        initially empty in these cases.)
 118  *
 119  *        Finally, we mark the relation as requiring the permissions specified
 120  *        by requiredPerms.
 121  *
 122  *        Returns the rangetable index of the target relation.
 123  */
 124 int
 125 setTargetTable(ParseState *pstate, RangeVar *relation,
 126                            bool inh, bool alsoSource, AclMode requiredPerms)
 127 {
 128         RangeTblEntry *rte;
 129         int                     rtindex;
 130
 131         /* Close old target; this could only happen for multi-action rules */
 132         if (pstate->p_target_relation != NULL)
 133                 heap_close(pstate->p_target_relation, NoLock);
 134
 135         /*
 136          * Open target rel and grab suitable lock (which we will hold till end
 137          * of transaction).
 138          *
 139          * analyze.c will eventually do the corresponding heap_close(), but *not*
 140          * release the lock.
 141          */
 142         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
 143
 144         /*
 145          * Now build an RTE.
 146          */
 147         rte = addRangeTableEntry(pstate, relation, NULL, inh, false);
 148         pstate->p_target_rangetblentry = rte;
 149
 150         /* assume new rte is at end */
 151         rtindex = list_length(pstate->p_rtable);
 152         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 153
 154         /*
 155          * Override addRangeTableEntry's default ACL_SELECT permissions check,
 156          * and instead mark target table as requiring exactly the specified
 157          * permissions.
 158          *
 159          * If we find an explicit reference to the rel later during parse
 160          * analysis, scanRTEForColumn will add the ACL_SELECT bit back again.
 161          * That can't happen for INSERT but it is possible for UPDATE and DELETE.
 162          */
 163         rte->requiredPerms = requiredPerms;
 164
 165         /*
 166          * If UPDATE/DELETE, add table to joinlist and namespace.
 167          */
 168         if (alsoSource)
 169                 addRTEtoQuery(pstate, rte, true, true);
 170
 171         return rtindex;
 172 }
 173
 174 /*
 175  * Simplify InhOption (yes/no/default) into boolean yes/no.
 176  *
 177  * The reason we do things this way is that we don't want to examine the
 178  * SQL_inheritance option flag until parse_analyze is run.      Otherwise,
 179  * we'd do the wrong thing with query strings that intermix SET commands
 180  * with queries.
 181  */
 182 bool
 183 interpretInhOption(InhOption inhOpt)
 184 {
 185         switch (inhOpt)
 186         {
 187                 case INH_NO:
 188                         return false;
 189                 case INH_YES:
 190                         return true;
 191                 case INH_DEFAULT:
 192                         return SQL_inheritance;
 193         }
 194         elog(ERROR, "bogus InhOption value: %d", inhOpt);
 195         return false;                           /* keep compiler quiet */
 196 }
 197
 198 /*
 199  * Given an enum that indicates whether WITH / WITHOUT OIDS was
 200  * specified by the user, return true iff the specified table/result
 201  * set should be created with OIDs. This needs to be done after
 202  * parsing the query string because the return value can depend upon
 203  * the default_with_oids GUC var.
 204  */
 205 bool
 206 interpretOidsOption(ContainsOids opt)
 207 {
 208         switch (opt)
 209         {
 210                 case MUST_HAVE_OIDS:
 211                         return true;
 212
 213                 case MUST_NOT_HAVE_OIDS:
 214                         return false;
 215
 216                 case DEFAULT_OIDS:
 217                         return default_with_oids;
 218         }
 219
 220         elog(ERROR, "bogus ContainsOids value: %d", opt);
 221         return false;                           /* keep compiler quiet */
 222 }
 223
 224 /*
 225  * Extract all not-in-common columns from column lists of a source table
 226  */
 227 static void
 228 extractRemainingColumns(List *common_colnames,
 229                                                 List *src_colnames, List *src_colvars,
 230                                                 List **res_colnames, List **res_colvars)
 231 {
 232         List       *new_colnames = NIL;
 233         List       *new_colvars = NIL;
 234         ListCell   *lnames, *lvars;
 235
 236         Assert(list_length(src_colnames) == list_length(src_colvars));
 237
 238         forboth(lnames, src_colnames, lvars, src_colvars)
 239         {
 240                 char       *colname = strVal(lfirst(lnames));
 241                 bool            match = false;
 242                 ListCell   *cnames;
 243
 244                 foreach(cnames, common_colnames)
 245                 {
 246                         char       *ccolname = strVal(lfirst(cnames));
 247
 248                         if (strcmp(colname, ccolname) == 0)
 249                         {
 250                                 match = true;
 251                                 break;
 252                         }
 253                 }
 254
 255                 if (!match)
 256                 {
 257                         new_colnames = lappend(new_colnames, lfirst(lnames));
 258                         new_colvars = lappend(new_colvars, lfirst(lvars));
 259                 }
 260         }
 261
 262         *res_colnames = new_colnames;
 263         *res_colvars = new_colvars;
 264 }
 265
 266 /* transformJoinUsingClause()
 267  *        Build a complete ON clause from a partially-transformed USING list.
 268  *        We are given lists of nodes representing left and right match columns.
 269  *        Result is a transformed qualification expression.
 270  */
 271 static Node *
 272 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
 273 {
 274         Node       *result = NULL;
 275         ListCell   *lvars, *rvars;
 276
 277         /*
 278          * We cheat a little bit here by building an untransformed operator
 279          * tree whose leaves are the already-transformed Vars.  This is OK
 280          * because transformExpr() won't complain about already-transformed
 281          * subnodes.
 282          */
 283         forboth(lvars, leftVars, rvars, rightVars)
 284         {
 285                 Node       *lvar = (Node *) lfirst(lvars);
 286                 Node       *rvar = (Node *) lfirst(rvars);
 287                 A_Expr     *e;
 288
 289                 e = makeSimpleA_Expr(AEXPR_OP, "=", copyObject(lvar), copyObject(rvar));
 290
 291                 if (result == NULL)
 292                         result = (Node *) e;
 293                 else
 294                 {
 295                         A_Expr     *a;
 296
 297                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e);
 298                         result = (Node *) a;
 299                 }
 300         }
 301
 302         /*
 303          * Since the references are already Vars, and are certainly from the
 304          * input relations, we don't have to go through the same pushups that
 305          * transformJoinOnClause() does.  Just invoke transformExpr() to fix
 306          * up the operators, and we're done.
 307          */
 308         result = transformExpr(pstate, result);
 309
 310         result = coerce_to_boolean(pstate, result, "JOIN/USING");
 311
 312         return result;
 313 }
 314
 315 /* transformJoinOnClause()
 316  *        Transform the qual conditions for JOIN/ON.
 317  *        Result is a transformed qualification expression.
 318  */
 319 static Node *
 320 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
 321                                           List *containedRels)
 322 {
 323         Node       *result;
 324         List       *save_namespace;
 325         Relids          clause_varnos;
 326         int                     varno;
 327
 328         /*
 329          * This is a tad tricky, for two reasons.  First, the namespace that
 330          * the join expression should see is just the two subtrees of the JOIN
 331          * plus any outer references from upper pstate levels.  So,
 332          * temporarily set this pstate's namespace accordingly.  (We need not
 333          * check for refname conflicts, because transformFromClauseItem()
 334          * already did.) NOTE: this code is OK only because the ON clause
 335          * can't legally alter the namespace by causing implicit relation refs
 336          * to be added.
 337          */
 338         save_namespace = pstate->p_namespace;
 339         pstate->p_namespace = list_make2(j->larg, j->rarg);
 340
 341         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
 342
 343         pstate->p_namespace = save_namespace;
 344
 345         /*
 346          * Second, we need to check that the ON condition doesn't refer to any
 347          * rels outside the input subtrees of the JOIN.  It could do that
 348          * despite our hack on the namespace if it uses fully-qualified names.
 349          * So, grovel through the transformed clause and make sure there are
 350          * no bogus references.  (Outer references are OK, and are ignored
 351          * here.)
 352          */
 353         clause_varnos = pull_varnos(result);
 354         while ((varno = bms_first_member(clause_varnos)) >= 0)
 355         {
 356                 if (!list_member_int(containedRels, varno))
 357                 {
 358                         ereport(ERROR,
 359                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 360                                          errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
 361                                    rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
 362                 }
 363         }
 364         bms_free(clause_varnos);
 365
 366         return result;
 367 }
 368
 369 /*
 370  * transformTableEntry --- transform a RangeVar (simple relation reference)
 371  */
 372 static RangeTblRef *
 373 transformTableEntry(ParseState *pstate, RangeVar *r)
 374 {
 375         RangeTblEntry *rte;
 376         RangeTblRef *rtr;
 377
 378         /*
 379          * mark this entry to indicate it comes from the FROM clause. In SQL,
 380          * the target list can only refer to range variables specified in the
 381          * from clause but we follow the more powerful POSTQUEL semantics and
 382          * automatically generate the range variable if not specified. However
 383          * there are times we need to know whether the entries are legitimate.
 384          */
 385         rte = addRangeTableEntry(pstate, r, r->alias,
 386                                                          interpretInhOption(r->inhOpt), true);
 387
 388         /*
 389          * We create a RangeTblRef, but we do not add it to the joinlist or
 390          * namespace; our caller must do that if appropriate.
 391          */
 392         rtr = makeNode(RangeTblRef);
 393         /* assume new rte is at end */
 394         rtr->rtindex = list_length(pstate->p_rtable);
 395         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
 396
 397         return rtr;
 398 }
 399
 400
 401 /*
 402  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
 403  */
 404 static RangeTblRef *
 405 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
 406 {
 407         List       *parsetrees;
 408         Query      *query;
 409         RangeTblEntry *rte;
 410         RangeTblRef *rtr;
 411
 412         /*
 413          * We require user to supply an alias for a subselect, per SQL92. To
 414          * relax this, we'd have to be prepared to gin up a unique alias for
 415          * an unlabeled subselect.
 416          */
 417         if (r->alias == NULL)
 418                 ereport(ERROR,
 419                                 (errcode(ERRCODE_SYNTAX_ERROR),
 420                                  errmsg("subquery in FROM must have an alias")));
 421
 422         /*
 423          * Analyze and transform the subquery.
 424          */
 425         parsetrees = parse_sub_analyze(r->subquery, pstate);
 426
 427         /*
 428          * Check that we got something reasonable.      Most of these conditions
 429          * are probably impossible given restrictions of the grammar, but
 430          * check 'em anyway.
 431          */
 432         if (list_length(parsetrees) != 1)
 433                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 434         query = (Query *) linitial(parsetrees);
 435         if (query == NULL || !IsA(query, Query))
 436                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 437
 438         if (query->commandType != CMD_SELECT)
 439                 elog(ERROR, "expected SELECT query from subquery in FROM");
 440         if (query->resultRelation != 0 || query->into != NULL)
 441                 ereport(ERROR,
 442                                 (errcode(ERRCODE_SYNTAX_ERROR),
 443                                  errmsg("subquery in FROM may not have SELECT INTO")));
 444
 445         /*
 446          * The subquery cannot make use of any variables from FROM items
 447          * created earlier in the current query.  Per SQL92, the scope of a
 448          * FROM item does not include other FROM items.  Formerly we hacked
 449          * the namespace so that the other variables weren't even visible, but
 450          * it seems more useful to leave them visible and give a specific
 451          * error message.
 452          *
 453          * XXX this will need further work to support SQL99's LATERAL() feature,
 454          * wherein such references would indeed be legal.
 455          *
 456          * We can skip groveling through the subquery if there's not anything
 457          * visible in the current query.  Also note that outer references are
 458          * OK.
 459          */
 460         if (pstate->p_namespace)
 461         {
 462                 if (contain_vars_of_level((Node *) query, 1))
 463                         ereport(ERROR,
 464                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 465                                          errmsg("subquery in FROM may not refer to other relations of same query level")));
 466         }
 467
 468         /*
 469          * OK, build an RTE for the subquery.
 470          */
 471         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
 472
 473         /*
 474          * We create a RangeTblRef, but we do not add it to the joinlist or
 475          * namespace; our caller must do that if appropriate.
 476          */
 477         rtr = makeNode(RangeTblRef);
 478         /* assume new rte is at end */
 479         rtr->rtindex = list_length(pstate->p_rtable);
 480         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
 481
 482         return rtr;
 483 }
 484
 485
 486 /*
 487  * transformRangeFunction --- transform a function call appearing in FROM
 488  */
 489 static RangeTblRef *
 490 transformRangeFunction(ParseState *pstate, RangeFunction *r)
 491 {
 492         Node       *funcexpr;
 493         char       *funcname;
 494         RangeTblEntry *rte;
 495         RangeTblRef *rtr;
 496
 497         /* Get function name for possible use as alias */
 498         Assert(IsA(r->funccallnode, FuncCall));
 499         funcname = strVal(llast(((FuncCall *) r->funccallnode)->funcname));
 500
 501         /*
 502          * Transform the raw FuncCall node.
 503          */
 504         funcexpr = transformExpr(pstate, r->funccallnode);
 505
 506         /*
 507          * The function parameters cannot make use of any variables from other
 508          * FROM items.  (Compare to transformRangeSubselect(); the coding is
 509          * different though because we didn't parse as a sub-select with its
 510          * own level of namespace.)
 511          *
 512          * XXX this will need further work to support SQL99's LATERAL() feature,
 513          * wherein such references would indeed be legal.
 514          */
 515         if (pstate->p_namespace)
 516         {
 517                 if (contain_vars_of_level(funcexpr, 0))
 518                         ereport(ERROR,
 519                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 520                                          errmsg("function expression in FROM may not refer to other relations of same query level")));
 521         }
 522
 523         /*
 524          * Disallow aggregate functions in the expression.      (No reason to
 525          * postpone this check until parseCheckAggregates.)
 526          */
 527         if (pstate->p_hasAggs)
 528         {
 529                 if (checkExprHasAggs(funcexpr))
 530                         ereport(ERROR,
 531                                         (errcode(ERRCODE_GROUPING_ERROR),
 532                                          errmsg("cannot use aggregate function in function expression in FROM")));
 533         }
 534
 535         /*
 536          * If a coldeflist is supplied, ensure it defines a legal set of names
 537          * (no duplicates) and datatypes (no pseudo-types, for instance).
 538          */
 539         if (r->coldeflist)
 540         {
 541                 TupleDesc       tupdesc;
 542
 543                 tupdesc = BuildDescForRelation(r->coldeflist);
 544                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
 545         }
 546
 547         /*
 548          * OK, build an RTE for the function.
 549          */
 550         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
 551                                                                                 r, true);
 552
 553         /*
 554          * We create a RangeTblRef, but we do not add it to the joinlist or
 555          * namespace; our caller must do that if appropriate.
 556          */
 557         rtr = makeNode(RangeTblRef);
 558         /* assume new rte is at end */
 559         rtr->rtindex = list_length(pstate->p_rtable);
 560         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
 561
 562         return rtr;
 563 }
 564
 565
 566 /*
 567  * transformFromClauseItem -
 568  *        Transform a FROM-clause item, adding any required entries to the
 569  *        range table list being built in the ParseState, and return the
 570  *        transformed item ready to include in the joinlist and namespace.
 571  *        This routine can recurse to handle SQL92 JOIN expressions.
 572  *
 573  *        Aside from the primary return value (the transformed joinlist item)
 574  *        this routine also returns an integer list of the rangetable indexes
 575  *        of all the base and join relations represented in the joinlist item.
 576  *        This list is needed for checking JOIN/ON conditions in higher levels.
 577  */
 578 static Node *
 579 transformFromClauseItem(ParseState *pstate, Node *n, List **containedRels)
 580 {
 581         if (IsA(n, RangeVar))
 582         {
 583                 /* Plain relation reference */
 584                 RangeTblRef *rtr;
 585
 586                 rtr = transformTableEntry(pstate, (RangeVar *) n);
 587                 *containedRels = list_make1_int(rtr->rtindex);
 588                 return (Node *) rtr;
 589         }
 590         else if (IsA(n, RangeSubselect))
 591         {
 592                 /* sub-SELECT is like a plain relation */
 593                 RangeTblRef *rtr;
 594
 595                 rtr = transformRangeSubselect(pstate, (RangeSubselect *) n);
 596                 *containedRels = list_make1_int(rtr->rtindex);
 597                 return (Node *) rtr;
 598         }
 599         else if (IsA(n, RangeFunction))
 600         {
 601                 /* function is like a plain relation */
 602                 RangeTblRef *rtr;
 603
 604                 rtr = transformRangeFunction(pstate, (RangeFunction *) n);
 605                 *containedRels = list_make1_int(rtr->rtindex);
 606                 return (Node *) rtr;
 607         }
 608         else if (IsA(n, JoinExpr))
 609         {
 610                 /* A newfangled join expression */
 611                 JoinExpr   *j = (JoinExpr *) n;
 612                 List       *my_containedRels,
 613                                    *l_containedRels,
 614                                    *r_containedRels,
 615                                    *l_colnames,
 616                                    *r_colnames,
 617                                    *res_colnames,
 618                                    *l_colvars,
 619                                    *r_colvars,
 620                                    *res_colvars;
 621                 Index           leftrti,
 622                                         rightrti;
 623                 RangeTblEntry *rte;
 624
 625                 /*
 626                  * Recursively process the left and right subtrees
 627                  */
 628                 j->larg = transformFromClauseItem(pstate, j->larg, &l_containedRels);
 629                 j->rarg = transformFromClauseItem(pstate, j->rarg, &r_containedRels);
 630
 631                 /*
 632                  * Generate combined list of relation indexes for possible use by
 633                  * transformJoinOnClause below.
 634                  */
 635                 my_containedRels = list_concat(l_containedRels, r_containedRels);
 636
 637                 /*
 638                  * Check for conflicting refnames in left and right subtrees. Must
 639                  * do this because higher levels will assume I hand back a self-
 640                  * consistent namespace subtree.
 641                  */
 642                 checkNameSpaceConflicts(pstate, j->larg, j->rarg);
 643
 644                 /*
 645                  * Extract column name and var lists from both subtrees
 646                  *
 647                  * Note: expandRTE returns new lists, safe for me to modify
 648                  */
 649                 if (IsA(j->larg, RangeTblRef))
 650                         leftrti = ((RangeTblRef *) j->larg)->rtindex;
 651                 else if (IsA(j->larg, JoinExpr))
 652                         leftrti = ((JoinExpr *) j->larg)->rtindex;
 653                 else
 654                 {
 655                         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(j->larg));
 656                         leftrti = 0;            /* keep compiler quiet */
 657                 }
 658                 rte = rt_fetch(leftrti, pstate->p_rtable);
 659                 expandRTE(pstate, rte, &l_colnames, &l_colvars);
 660
 661                 if (IsA(j->rarg, RangeTblRef))
 662                         rightrti = ((RangeTblRef *) j->rarg)->rtindex;
 663                 else if (IsA(j->rarg, JoinExpr))
 664                         rightrti = ((JoinExpr *) j->rarg)->rtindex;
 665                 else
 666                 {
 667                         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(j->rarg));
 668                         rightrti = 0;           /* keep compiler quiet */
 669                 }
 670                 rte = rt_fetch(rightrti, pstate->p_rtable);
 671                 expandRTE(pstate, rte, &r_colnames, &r_colvars);
 672
 673                 /*
 674                  * Natural join does not explicitly specify columns; must generate
 675                  * columns to join. Need to run through the list of columns from
 676                  * each table or join result and match up the column names. Use
 677                  * the first table, and check every column in the second table for
 678                  * a match.  (We'll check that the matches were unique later on.)
 679                  * The result of this step is a list of column names just like an
 680                  * explicitly-written USING list.
 681                  */
 682                 if (j->isNatural)
 683                 {
 684                         List       *rlist = NIL;
 685                         ListCell   *lx,
 686                                            *rx;
 687
 688                         Assert(j->using == NIL);        /* shouldn't have USING() too */
 689
 690                         foreach(lx, l_colnames)
 691                         {
 692                                 char       *l_colname = strVal(lfirst(lx));
 693                                 Value      *m_name = NULL;
 694
 695                                 foreach(rx, r_colnames)
 696                                 {
 697                                         char       *r_colname = strVal(lfirst(rx));
 698
 699                                         if (strcmp(l_colname, r_colname) == 0)
 700                                         {
 701                                                 m_name = makeString(l_colname);
 702                                                 break;
 703                                         }
 704                                 }
 705
 706                                 /* matched a right column? then keep as join column... */
 707                                 if (m_name != NULL)
 708                                         rlist = lappend(rlist, m_name);
 709                         }
 710
 711                         j->using = rlist;
 712                 }
 713
 714                 /*
 715                  * Now transform the join qualifications, if any.
 716                  */
 717                 res_colnames = NIL;
 718                 res_colvars = NIL;
 719
 720                 if (j->using)
 721                 {
 722                         /*
 723                          * JOIN/USING (or NATURAL JOIN, as transformed above).
 724                          * Transform the list into an explicit ON-condition, and
 725                          * generate a list of merged result columns.
 726                          */
 727                         List       *ucols = j->using;
 728                         List       *l_usingvars = NIL;
 729                         List       *r_usingvars = NIL;
 730                         ListCell   *ucol;
 731
 732                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
 733
 734                         foreach(ucol, ucols)
 735                         {
 736                                 char       *u_colname = strVal(lfirst(ucol));
 737                                 ListCell   *col;
 738                                 int                     ndx;
 739                                 int                     l_index = -1;
 740                                 int                     r_index = -1;
 741                                 Var                *l_colvar,
 742                                                    *r_colvar;
 743
 744                                 /* Check for USING(foo,foo) */
 745                                 foreach(col, res_colnames)
 746                                 {
 747                                         char       *res_colname = strVal(lfirst(col));
 748
 749                                         if (strcmp(res_colname, u_colname) == 0)
 750                                                 ereport(ERROR,
 751                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
 752                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
 753                                                                                 u_colname)));
 754                                 }
 755
 756                                 /* Find it in left input */
 757                                 ndx = 0;
 758                                 foreach(col, l_colnames)
 759                                 {
 760                                         char       *l_colname = strVal(lfirst(col));
 761
 762                                         if (strcmp(l_colname, u_colname) == 0)
 763                                         {
 764                                                 if (l_index >= 0)
 765                                                         ereport(ERROR,
 766                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 767                                                                          errmsg("common column name \"%s\" appears more than once in left table",
 768                                                                                         u_colname)));
 769                                                 l_index = ndx;
 770                                         }
 771                                         ndx++;
 772                                 }
 773                                 if (l_index < 0)
 774                                         ereport(ERROR,
 775                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 776                                                          errmsg("column \"%s\" specified in USING clause does not exist in left table",
 777                                                                         u_colname)));
 778
 779                                 /* Find it in right input */
 780                                 ndx = 0;
 781                                 foreach(col, r_colnames)
 782                                 {
 783                                         char       *r_colname = strVal(lfirst(col));
 784
 785                                         if (strcmp(r_colname, u_colname) == 0)
 786                                         {
 787                                                 if (r_index >= 0)
 788                                                         ereport(ERROR,
 789                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 790                                                                          errmsg("common column name \"%s\" appears more than once in right table",
 791                                                                                         u_colname)));
 792                                                 r_index = ndx;
 793                                         }
 794                                         ndx++;
 795                                 }
 796                                 if (r_index < 0)
 797                                         ereport(ERROR,
 798                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 799                                                          errmsg("column \"%s\" specified in USING clause does not exist in right table",
 800                                                                         u_colname)));
 801
 802                                 l_colvar = list_nth(l_colvars, l_index);
 803                                 l_usingvars = lappend(l_usingvars, l_colvar);
 804                                 r_colvar = list_nth(r_colvars, r_index);
 805                                 r_usingvars = lappend(r_usingvars, r_colvar);
 806
 807                                 res_colnames = lappend(res_colnames, lfirst(ucol));
 808                                 res_colvars = lappend(res_colvars,
 809                                                                           buildMergedJoinVar(pstate,
 810                                                                                                                  j->jointype,
 811                                                                                                                  l_colvar,
 812                                                                                                                  r_colvar));
 813                         }
 814
 815                         j->quals = transformJoinUsingClause(pstate,
 816                                                                                                 l_usingvars,
 817                                                                                                 r_usingvars);
 818                 }
 819                 else if (j->quals)
 820                 {
 821                         /* User-written ON-condition; transform it */
 822                         j->quals = transformJoinOnClause(pstate, j, my_containedRels);
 823                 }
 824                 else
 825                 {
 826                         /* CROSS JOIN: no quals */
 827                 }
 828
 829                 /* Add remaining columns from each side to the output columns */
 830                 extractRemainingColumns(res_colnames,
 831                                                                 l_colnames, l_colvars,
 832                                                                 &l_colnames, &l_colvars);
 833                 extractRemainingColumns(res_colnames,
 834                                                                 r_colnames, r_colvars,
 835                                                                 &r_colnames, &r_colvars);
 836                 res_colnames = list_concat(res_colnames, l_colnames);
 837                 res_colvars = list_concat(res_colvars, l_colvars);
 838                 res_colnames = list_concat(res_colnames, r_colnames);
 839                 res_colvars = list_concat(res_colvars, r_colvars);
 840
 841                 /*
 842                  * Check alias (AS clause), if any.
 843                  */
 844                 if (j->alias)
 845                 {
 846                         if (j->alias->colnames != NIL)
 847                         {
 848                                 if (list_length(j->alias->colnames) > list_length(res_colnames))
 849                                         ereport(ERROR,
 850                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 851                                                          errmsg("column alias list for \"%s\" has too many entries",
 852                                                                         j->alias->aliasname)));
 853                         }
 854                 }
 855
 856                 /*
 857                  * Now build an RTE for the result of the join
 858                  */
 859                 rte = addRangeTableEntryForJoin(pstate,
 860                                                                                 res_colnames,
 861                                                                                 j->jointype,
 862                                                                                 res_colvars,
 863                                                                                 j->alias,
 864                                                                                 true);
 865
 866                 /* assume new rte is at end */
 867                 j->rtindex = list_length(pstate->p_rtable);
 868                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
 869
 870                 /*
 871                  * Include join RTE in returned containedRels list
 872                  */
 873                 *containedRels = lcons_int(j->rtindex, my_containedRels);
 874
 875                 return (Node *) j;
 876         }
 877         else
 878                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
 879         return NULL;                            /* can't get here, keep compiler quiet */
 880 }
 881
 882 /*
 883  * buildMergedJoinVar -
 884  *        generate a suitable replacement expression for a merged join column
 885  */
 886 static Node *
 887 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 888                                    Var *l_colvar, Var *r_colvar)
 889 {
 890         Oid                     outcoltype;
 891         int32           outcoltypmod;
 892         Node       *l_node,
 893                            *r_node,
 894                            *res_node;
 895
 896         /*
 897          * Choose output type if input types are dissimilar.
 898          */
 899         outcoltype = l_colvar->vartype;
 900         outcoltypmod = l_colvar->vartypmod;
 901         if (outcoltype != r_colvar->vartype)
 902         {
 903                 outcoltype = select_common_type(list_make2_oid(l_colvar->vartype,
 904                                                                                                            r_colvar->vartype),
 905                                                                                 "JOIN/USING");
 906                 outcoltypmod = -1;              /* ie, unknown */
 907         }
 908         else if (outcoltypmod != r_colvar->vartypmod)
 909         {
 910                 /* same type, but not same typmod */
 911                 outcoltypmod = -1;              /* ie, unknown */
 912         }
 913
 914         /*
 915          * Insert coercion functions if needed.  Note that a difference in
 916          * typmod can only happen if input has typmod but outcoltypmod is -1.
 917          * In that case we insert a RelabelType to clearly mark that result's
 918          * typmod is not same as input.  We never need coerce_type_typmod.
 919          */
 920         if (l_colvar->vartype != outcoltype)
 921                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
 922                                                          outcoltype, outcoltypmod,
 923                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 924         else if (l_colvar->vartypmod != outcoltypmod)
 925                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
 926                                                                                   outcoltype, outcoltypmod,
 927                                                                                   COERCE_IMPLICIT_CAST);
 928         else
 929                 l_node = (Node *) l_colvar;
 930
 931         if (r_colvar->vartype != outcoltype)
 932                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
 933                                                          outcoltype, outcoltypmod,
 934                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 935         else if (r_colvar->vartypmod != outcoltypmod)
 936                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
 937                                                                                   outcoltype, outcoltypmod,
 938                                                                                   COERCE_IMPLICIT_CAST);
 939         else
 940                 r_node = (Node *) r_colvar;
 941
 942         /*
 943          * Choose what to emit
 944          */
 945         switch (jointype)
 946         {
 947                 case JOIN_INNER:
 948
 949                         /*
 950                          * We can use either var; prefer non-coerced one if available.
 951                          */
 952                         if (IsA(l_node, Var))
 953                                 res_node = l_node;
 954                         else if (IsA(r_node, Var))
 955                                 res_node = r_node;
 956                         else
 957                                 res_node = l_node;
 958                         break;
 959                 case JOIN_LEFT:
 960                         /* Always use left var */
 961                         res_node = l_node;
 962                         break;
 963                 case JOIN_RIGHT:
 964                         /* Always use right var */
 965                         res_node = r_node;
 966                         break;
 967                 case JOIN_FULL:
 968                         {
 969                                 /*
 970                                  * Here we must build a COALESCE expression to ensure that
 971                                  * the join output is non-null if either input is.
 972                                  */
 973                                 CoalesceExpr *c = makeNode(CoalesceExpr);
 974
 975                                 c->coalescetype = outcoltype;
 976                                 c->args = list_make2(l_node, r_node);
 977                                 res_node = (Node *) c;
 978                                 break;
 979                         }
 980                 default:
 981                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
 982                         res_node = NULL;        /* keep compiler quiet */
 983                         break;
 984         }
 985
 986         return res_node;
 987 }
 988
 989
 990 /*
 991  * transformWhereClause -
 992  *        Transform the qualification and make sure it is of type boolean.
 993  *        Used for WHERE and allied clauses.
 994  *
 995  * constructName does not affect the semantics, but is used in error messages
 996  */
 997 Node *
 998 transformWhereClause(ParseState *pstate, Node *clause,
 999                                          const char *constructName)
1000 {
1001         Node       *qual;
1002
1003         if (clause == NULL)
1004                 return NULL;
1005
1006         qual = transformExpr(pstate, clause);
1007
1008         qual = coerce_to_boolean(pstate, qual, constructName);
1009
1010         return qual;
1011 }
1012
1013
1014 /*
1015  * transformLimitClause -
1016  *        Transform the expression and make sure it is of type integer.
1017  *        Used for LIMIT and allied clauses.
1018  *
1019  * constructName does not affect the semantics, but is used in error messages
1020  */
1021 Node *
1022 transformLimitClause(ParseState *pstate, Node *clause,
1023                                          const char *constructName)
1024 {
1025         Node       *qual;
1026
1027         if (clause == NULL)
1028                 return NULL;
1029
1030         qual = transformExpr(pstate, clause);
1031
1032         qual = coerce_to_integer(pstate, qual, constructName);
1033
1034         /*
1035          * LIMIT can't refer to any vars or aggregates of the current query;
1036          * we don't allow subselects either (though that case would at least
1037          * be sensible)
1038          */
1039         if (contain_vars_of_level(qual, 0))
1040         {
1041                 ereport(ERROR,
1042                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1043                 /* translator: %s is name of a SQL construct, eg LIMIT */
1044                                  errmsg("argument of %s must not contain variables",
1045                                                 constructName)));
1046         }
1047         if (checkExprHasAggs(qual))
1048         {
1049                 ereport(ERROR,
1050                                 (errcode(ERRCODE_GROUPING_ERROR),
1051                 /* translator: %s is name of a SQL construct, eg LIMIT */
1052                                  errmsg("argument of %s must not contain aggregates",
1053                                                 constructName)));
1054         }
1055         if (contain_subplans(qual))
1056         {
1057                 ereport(ERROR,
1058                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1059                 /* translator: %s is name of a SQL construct, eg LIMIT */
1060                                  errmsg("argument of %s must not contain subqueries",
1061                                                 constructName)));
1062         }
1063
1064         return qual;
1065 }
1066
1067
1068 /*
1069  *      findTargetlistEntry -
1070  *        Returns the targetlist entry matching the given (untransformed) node.
1071  *        If no matching entry exists, one is created and appended to the target
1072  *        list as a "resjunk" node.
1073  *
1074  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1075  * tlist        the target list (passed by reference so we can append to it)
1076  * clause       identifies clause type being processed
1077  */
1078 static TargetEntry *
1079 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1080 {
1081         TargetEntry *target_result = NULL;
1082         ListCell   *tl;
1083         Node       *expr;
1084
1085         /*----------
1086          * Handle two special cases as mandated by the SQL92 spec:
1087          *
1088          * 1. Bare ColumnName (no qualifier or subscripts)
1089          *        For a bare identifier, we search for a matching column name
1090          *        in the existing target list.  Multiple matches are an error
1091          *        unless they refer to identical values; for example,
1092          *        we allow      SELECT a, a FROM table ORDER BY a
1093          *        but not       SELECT a AS b, b FROM table ORDER BY b
1094          *        If no match is found, we fall through and treat the identifier
1095          *        as an expression.
1096          *        For GROUP BY, it is incorrect to match the grouping item against
1097          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1098          *        is a reference to a column name exposed by FROM, not to a target
1099          *        list column.  However, many implementations (including pre-7.0
1100          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1101          *        to see if the identifier matches any FROM column name, and only
1102          *        try for a targetlist name if it doesn't.  This ensures that we
1103          *        adhere to the spec in the case where the name could be both.
1104          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1105          *        we choose to make it work like ORDER BY, on the rather flimsy
1106          *        grounds that ordinary DISTINCT works on targetlist entries.
1107          *
1108          * 2. IntegerConstant
1109          *        This means to use the n'th item in the existing target list.
1110          *        Note that it would make no sense to order/group/distinct by an
1111          *        actual constant, so this does not create a conflict with our
1112          *        extension to order/group by an expression.
1113          *        GROUP BY column-number is not allowed by SQL92, but since
1114          *        the standard has no other behavior defined for this syntax,
1115          *        we may as well accept this common extension.
1116          *
1117          * Note that pre-existing resjunk targets must not be used in either case,
1118          * since the user didn't write them in his SELECT list.
1119          *
1120          * If neither special case applies, fall through to treat the item as
1121          * an expression.
1122          *----------
1123          */
1124         if (IsA(node, ColumnRef) &&
1125                 list_length(((ColumnRef *) node)->fields) == 1)
1126         {
1127                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
1128
1129                 if (clause == GROUP_CLAUSE)
1130                 {
1131                         /*
1132                          * In GROUP BY, we must prefer a match against a FROM-clause
1133                          * column to one against the targetlist.  Look to see if there
1134                          * is a matching column.  If so, fall through to let
1135                          * transformExpr() do the rest.  NOTE: if name could refer
1136                          * ambiguously to more than one column name exposed by FROM,
1137                          * colNameToVar will ereport(ERROR).  That's just what we want
1138                          * here.
1139                          *
1140                          * Small tweak for 7.4.3: ignore matches in upper query levels.
1141                          * This effectively changes the search order for bare names to
1142                          * (1) local FROM variables, (2) local targetlist aliases,
1143                          * (3) outer FROM variables, whereas before it was (1) (3) (2).
1144                          * SQL92 and SQL99 do not allow GROUPing BY an outer reference,
1145                          * so this breaks no cases that are legal per spec, and it
1146                          * seems a more self-consistent behavior.
1147                          */
1148                         if (colNameToVar(pstate, name, true) != NULL)
1149                                 name = NULL;
1150                 }
1151
1152                 if (name != NULL)
1153                 {
1154                         foreach(tl, *tlist)
1155                         {
1156                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1157                                 Resdom     *resnode = tle->resdom;
1158
1159                                 if (!resnode->resjunk &&
1160                                         strcmp(resnode->resname, name) == 0)
1161                                 {
1162                                         if (target_result != NULL)
1163                                         {
1164                                                 if (!equal(target_result->expr, tle->expr))
1165                                                         ereport(ERROR,
1166                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1167                                                                          /* translator: first %s is name of a SQL construct, eg ORDER BY */
1168                                                                          errmsg("%s \"%s\" is ambiguous",
1169                                                                                         clauseText[clause], name)));
1170                                         }
1171                                         else
1172                                                 target_result = tle;
1173                                         /* Stay in loop to check for ambiguity */
1174                                 }
1175                         }
1176                         if (target_result != NULL)
1177                                 return target_result;   /* return the first match */
1178                 }
1179         }
1180         if (IsA(node, A_Const))
1181         {
1182                 Value      *val = &((A_Const *) node)->val;
1183                 int                     targetlist_pos = 0;
1184                 int                     target_pos;
1185
1186                 if (!IsA(val, Integer))
1187                         ereport(ERROR,
1188                                         (errcode(ERRCODE_SYNTAX_ERROR),
1189                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1190                                          errmsg("non-integer constant in %s",
1191                                                         clauseText[clause])));
1192                 target_pos = intVal(val);
1193                 foreach(tl, *tlist)
1194                 {
1195                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1196                         Resdom     *resnode = tle->resdom;
1197
1198                         if (!resnode->resjunk)
1199                         {
1200                                 if (++targetlist_pos == target_pos)
1201                                         return tle; /* return the unique match */
1202                         }
1203                 }
1204                 ereport(ERROR,
1205                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1206                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1207                                  errmsg("%s position %d is not in select list",
1208                                                 clauseText[clause], target_pos)));
1209         }
1210
1211         /*
1212          * Otherwise, we have an expression (this is a Postgres extension not
1213          * found in SQL92).  Convert the untransformed node to a transformed
1214          * expression, and search for a match in the tlist. NOTE: it doesn't
1215          * really matter whether there is more than one match.  Also, we are
1216          * willing to match a resjunk target here, though the above cases must
1217          * ignore resjunk targets.
1218          */
1219         expr = transformExpr(pstate, node);
1220
1221         foreach(tl, *tlist)
1222         {
1223                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1224
1225                 if (equal(expr, tle->expr))
1226                         return tle;
1227         }
1228
1229         /*
1230          * If no matches, construct a new target entry which is appended to
1231          * the end of the target list.  This target is given resjunk = TRUE so
1232          * that it will not be projected into the final tuple.
1233          */
1234         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1235
1236         *tlist = lappend(*tlist, target_result);
1237
1238         return target_result;
1239 }
1240
1241
1242 /*
1243  * transformGroupClause -
1244  *        transform a GROUP BY clause
1245  *
1246  * GROUP BY items will be added to the targetlist (as resjunk columns)
1247  * if not already present, so the targetlist must be passed by reference.
1248  */
1249 List *
1250 transformGroupClause(ParseState *pstate, List *grouplist,
1251                                          List **targetlist, List *sortClause)
1252 {
1253         List       *glist = NIL;
1254         ListCell   *gl;
1255         ListCell   *sortItem;
1256
1257         sortItem = list_head(sortClause);
1258
1259         foreach(gl, grouplist)
1260         {
1261                 TargetEntry *tle;
1262                 Oid                     restype;
1263                 Oid                     ordering_op;
1264                 GroupClause *grpcl;
1265
1266                 tle = findTargetlistEntry(pstate, lfirst(gl),
1267                                                                   targetlist, GROUP_CLAUSE);
1268
1269                 /* avoid making duplicate grouplist entries */
1270                 if (targetIsInSortList(tle, glist))
1271                         continue;
1272
1273                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1274                 restype = tle->resdom->restype;
1275
1276                 if (restype == UNKNOWNOID)
1277                 {
1278                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1279                                                                                          restype, TEXTOID, -1,
1280                                                                                          COERCION_IMPLICIT,
1281                                                                                          COERCE_IMPLICIT_CAST);
1282                         restype = tle->resdom->restype = TEXTOID;
1283                         tle->resdom->restypmod = -1;
1284                 }
1285
1286                 /*
1287                  * If the GROUP BY clause matches the ORDER BY clause, we want to
1288                  * adopt the ordering operators from the latter rather than using
1289                  * the default ops.  This allows "GROUP BY foo ORDER BY foo DESC"
1290                  * to be done with only one sort step.  Note we are assuming that
1291                  * any user-supplied ordering operator will bring equal values
1292                  * together, which is all that GROUP BY needs.
1293                  */
1294                 if (sortItem &&
1295                         ((SortClause *) lfirst(sortItem))->tleSortGroupRef ==
1296                         tle->resdom->ressortgroupref)
1297                 {
1298                         ordering_op = ((SortClause *) lfirst(sortItem))->sortop;
1299                         sortItem = lnext(sortItem);
1300                 }
1301                 else
1302                 {
1303                         ordering_op = ordering_oper_opid(restype);
1304                         sortItem = NULL;        /* disregard ORDER BY once match fails */
1305                 }
1306
1307                 grpcl = makeNode(GroupClause);
1308                 grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist);
1309                 grpcl->sortop = ordering_op;
1310                 glist = lappend(glist, grpcl);
1311         }
1312
1313         return glist;
1314 }
1315
1316 /*
1317  * transformSortClause -
1318  *        transform an ORDER BY clause
1319  *
1320  * ORDER BY items will be added to the targetlist (as resjunk columns)
1321  * if not already present, so the targetlist must be passed by reference.
1322  */
1323 List *
1324 transformSortClause(ParseState *pstate,
1325                                         List *orderlist,
1326                                         List **targetlist,
1327                                         bool resolveUnknown)
1328 {
1329         List       *sortlist = NIL;
1330         ListCell   *olitem;
1331
1332         foreach(olitem, orderlist)
1333         {
1334                 SortBy     *sortby = lfirst(olitem);
1335                 TargetEntry *tle;
1336
1337                 tle = findTargetlistEntry(pstate, sortby->node,
1338                                                                   targetlist, ORDER_CLAUSE);
1339
1340                 sortlist = addTargetToSortList(pstate, tle,
1341                                                                            sortlist, *targetlist,
1342                                                                            sortby->sortby_kind,
1343                                                                            sortby->useOp,
1344                                                                            resolveUnknown);
1345         }
1346
1347         return sortlist;
1348 }
1349
1350 /*
1351  * transformDistinctClause -
1352  *        transform a DISTINCT or DISTINCT ON clause
1353  *
1354  * Since we may need to add items to the query's sortClause list, that list
1355  * is passed by reference.      Likewise for the targetlist.
1356  */
1357 List *
1358 transformDistinctClause(ParseState *pstate, List *distinctlist,
1359                                                 List **targetlist, List **sortClause)
1360 {
1361         List       *result = NIL;
1362         ListCell   *slitem;
1363         ListCell   *dlitem;
1364
1365         /* No work if there was no DISTINCT clause */
1366         if (distinctlist == NIL)
1367                 return NIL;
1368
1369         if (linitial(distinctlist) == NULL)
1370         {
1371                 /* We had SELECT DISTINCT */
1372
1373                 /*
1374                  * All non-resjunk elements from target list that are not already
1375                  * in the sort list should be added to it.      (We don't really care
1376                  * what order the DISTINCT fields are checked in, so we can leave
1377                  * the user's ORDER BY spec alone, and just add additional sort
1378                  * keys to it to ensure that all targetlist items get sorted.)
1379                  */
1380                 *sortClause = addAllTargetsToSortList(pstate,
1381                                                                                           *sortClause,
1382                                                                                           *targetlist,
1383                                                                                           true);
1384
1385                 /*
1386                  * Now, DISTINCT list consists of all non-resjunk sortlist items.
1387                  * Actually, all the sortlist items had better be non-resjunk!
1388                  * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item
1389                  * that does not appear anywhere in the SELECT targetlist, and we
1390                  * can't implement that with only one sorting pass...
1391                  */
1392                 foreach(slitem, *sortClause)
1393                 {
1394                         SortClause *scl = (SortClause *) lfirst(slitem);
1395                         TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1396
1397                         if (tle->resdom->resjunk)
1398                                 ereport(ERROR,
1399                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1400                                                  errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1401                         else
1402                                 result = lappend(result, copyObject(scl));
1403                 }
1404         }
1405         else
1406         {
1407                 /* We had SELECT DISTINCT ON (expr, ...) */
1408
1409                 /*
1410                  * If the user writes both DISTINCT ON and ORDER BY, then the two
1411                  * expression lists must match (until one or the other runs out).
1412                  * Otherwise the ORDER BY requires a different sort order than the
1413                  * DISTINCT does, and we can't implement that with only one sort
1414                  * pass (and if we do two passes, the results will be rather
1415                  * unpredictable). However, it's OK to have more DISTINCT ON
1416                  * expressions than ORDER BY expressions; we can just add the
1417                  * extra DISTINCT values to the sort list, much as we did above
1418                  * for ordinary DISTINCT fields.
1419                  *
1420                  * Actually, it'd be OK for the common prefixes of the two lists to
1421                  * match in any order, but implementing that check seems like more
1422                  * trouble than it's worth.
1423                  */
1424                 ListCell   *nextsortlist = list_head(*sortClause);
1425
1426                 foreach(dlitem, distinctlist)
1427                 {
1428                         TargetEntry *tle;
1429
1430                         tle = findTargetlistEntry(pstate, lfirst(dlitem),
1431                                                                           targetlist, DISTINCT_ON_CLAUSE);
1432
1433                         if (nextsortlist != NULL)
1434                         {
1435                                 SortClause *scl = (SortClause *) lfirst(nextsortlist);
1436
1437                                 if (tle->resdom->ressortgroupref != scl->tleSortGroupRef)
1438                                         ereport(ERROR,
1439                                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1440                                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1441                                 result = lappend(result, copyObject(scl));
1442                                 nextsortlist = lnext(nextsortlist);
1443                         }
1444                         else
1445                         {
1446                                 *sortClause = addTargetToSortList(pstate, tle,
1447                                                                                                   *sortClause, *targetlist,
1448                                                                                                   SORTBY_ASC, NIL, true);
1449
1450                                 /*
1451                                  * Probably, the tle should always have been added at the
1452                                  * end of the sort list ... but search to be safe.
1453                                  */
1454                                 foreach(slitem, *sortClause)
1455                                 {
1456                                         SortClause *scl = (SortClause *) lfirst(slitem);
1457
1458                                         if (tle->resdom->ressortgroupref == scl->tleSortGroupRef)
1459                                         {
1460                                                 result = lappend(result, copyObject(scl));
1461                                                 break;
1462                                         }
1463                                 }
1464                                 if (slitem == NULL)             /* should not happen */
1465                                         elog(ERROR, "failed to add DISTINCT ON clause to target list");
1466                         }
1467                 }
1468         }
1469
1470         return result;
1471 }
1472
1473 /*
1474  * addAllTargetsToSortList
1475  *              Make sure all non-resjunk targets in the targetlist are in the
1476  *              ORDER BY list, adding the not-yet-sorted ones to the end of the list.
1477  *              This is typically used to help implement SELECT DISTINCT.
1478  *
1479  * See addTargetToSortList for info about pstate and resolveUnknown inputs.
1480  *
1481  * Returns the updated ORDER BY list.
1482  */
1483 List *
1484 addAllTargetsToSortList(ParseState *pstate, List *sortlist,
1485                                                 List *targetlist, bool resolveUnknown)
1486 {
1487         ListCell  *l;
1488
1489         foreach(l, targetlist)
1490         {
1491                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1492
1493                 if (!tle->resdom->resjunk)
1494                         sortlist = addTargetToSortList(pstate, tle,
1495                                                                                    sortlist, targetlist,
1496                                                                                    SORTBY_ASC, NIL,
1497                                                                                    resolveUnknown);
1498         }
1499         return sortlist;
1500 }
1501
1502 /*
1503  * addTargetToSortList
1504  *              If the given targetlist entry isn't already in the ORDER BY list,
1505  *              add it to the end of the list, using the sortop with given name
1506  *              or the default sort operator if opname == NIL.
1507  *
1508  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1509  * do nothing (which implies the search for a sort operator will fail).
1510  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1511  * otherwise.
1512  *
1513  * Returns the updated ORDER BY list.
1514  */
1515 List *
1516 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1517                                         List *sortlist, List *targetlist,
1518                                         int sortby_kind, List *sortby_opname,
1519                                         bool resolveUnknown)
1520 {
1521         /* avoid making duplicate sortlist entries */
1522         if (!targetIsInSortList(tle, sortlist))
1523         {
1524                 SortClause *sortcl = makeNode(SortClause);
1525                 Oid                     restype = tle->resdom->restype;
1526
1527                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1528                 if (restype == UNKNOWNOID && resolveUnknown)
1529                 {
1530                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1531                                                                                          restype, TEXTOID, -1,
1532                                                                                          COERCION_IMPLICIT,
1533                                                                                          COERCE_IMPLICIT_CAST);
1534                         restype = tle->resdom->restype = TEXTOID;
1535                         tle->resdom->restypmod = -1;
1536                 }
1537
1538                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1539
1540                 switch (sortby_kind)
1541                 {
1542                         case SORTBY_ASC:
1543                                 sortcl->sortop = ordering_oper_opid(restype);
1544                                 break;
1545                         case SORTBY_DESC:
1546                                 sortcl->sortop = reverse_ordering_oper_opid(restype);
1547                                 break;
1548                         case SORTBY_USING:
1549                                 Assert(sortby_opname != NIL);
1550                                 sortcl->sortop = compatible_oper_opid(sortby_opname,
1551                                                                                                           restype,
1552                                                                                                           restype,
1553                                                                                                           false);
1554                                 break;
1555                         default:
1556                                 elog(ERROR, "unrecognized sortby_kind: %d", sortby_kind);
1557                                 break;
1558                 }
1559
1560                 sortlist = lappend(sortlist, sortcl);
1561         }
1562         return sortlist;
1563 }
1564
1565 /*
1566  * assignSortGroupRef
1567  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1568  *        already have one.  Return the assigned or pre-existing refnumber.
1569  *
1570  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1571  */
1572 Index
1573 assignSortGroupRef(TargetEntry *tle, List *tlist)
1574 {
1575         Index           maxRef;
1576         ListCell   *l;
1577
1578         if (tle->resdom->ressortgroupref)       /* already has one? */
1579                 return tle->resdom->ressortgroupref;
1580
1581         /* easiest way to pick an unused refnumber: max used + 1 */
1582         maxRef = 0;
1583         foreach(l, tlist)
1584         {
1585                 Index           ref = ((TargetEntry *) lfirst(l))->resdom->ressortgroupref;
1586
1587                 if (ref > maxRef)
1588                         maxRef = ref;
1589         }
1590         tle->resdom->ressortgroupref = maxRef + 1;
1591         return tle->resdom->ressortgroupref;
1592 }
1593
1594 /*
1595  * targetIsInSortList
1596  *              Is the given target item already in the sortlist?
1597  *
1598  * Works for both SortClause and GroupClause lists.  Note that the main
1599  * reason we need this routine (and not just a quick test for nonzeroness
1600  * of ressortgroupref) is that a TLE might be in only one of the lists.
1601  */
1602 bool
1603 targetIsInSortList(TargetEntry *tle, List *sortList)
1604 {
1605         Index           ref = tle->resdom->ressortgroupref;
1606         ListCell   *l;
1607
1608         /* no need to scan list if tle has no marker */
1609         if (ref == 0)
1610                 return false;
1611
1612         foreach(l, sortList)
1613         {
1614                 SortClause *scl = (SortClause *) lfirst(l);
1615
1616                 if (scl->tleSortGroupRef == ref)
1617                         return true;
1618         }
1619         return false;
1620 }