granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parse_clause.c
   4  *        handle clauses in parser
   5  *
   6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/heapam.h"
  19 #include "catalog/heap.h"
  20 #include "catalog/pg_type.h"
  21 #include "commands/defrem.h"
  22 #include "nodes/makefuncs.h"
  23 #include "optimizer/clauses.h"
  24 #include "optimizer/tlist.h"
  25 #include "optimizer/var.h"
  26 #include "parser/analyze.h"
  27 #include "parser/parsetree.h"
  28 #include "parser/parse_clause.h"
  29 #include "parser/parse_coerce.h"
  30 #include "parser/parse_expr.h"
  31 #include "parser/parse_oper.h"
  32 #include "parser/parse_relation.h"
  33 #include "parser/parse_target.h"
  34 #include "rewrite/rewriteManip.h"
  35 #include "utils/guc.h"
  36 #include "utils/lsyscache.h"
  37 #include "utils/rel.h"
  38
  39
  40 #define ORDER_CLAUSE 0
  41 #define GROUP_CLAUSE 1
  42 #define DISTINCT_ON_CLAUSE 2
  43
  44 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
  45
  46 static void extractRemainingColumns(List *common_colnames,
  47                                                 List *src_colnames, List *src_colvars,
  48                                                 List **res_colnames, List **res_colvars);
  49 static Node *transformJoinUsingClause(ParseState *pstate,
  50                                                  List *leftVars, List *rightVars);
  51 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
  52                                           RangeTblEntry *l_rte,
  53                                           RangeTblEntry *r_rte,
  54                                           List *relnamespace,
  55                                           Relids containedRels);
  56 static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r);
  57 static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
  58                                                 RangeSubselect *r);
  59 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
  60                                            RangeFunction *r);
  61 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
  62                                                 RangeTblEntry **top_rte, int *top_rti,
  63                                                 List **relnamespace,
  64                                                 Relids *containedRels);
  65 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
  66                                    Var *l_colvar, Var *r_colvar);
  67 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
  68                                         List **tlist, int clause);
  69 static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
  70                                         List *sortlist, List *targetlist,
  71                                         SortByDir sortby_dir, SortByNulls sortby_nulls,
  72                                         List *sortby_opname, bool resolveUnknown);
  73
  74
  75 /*
  76  * transformFromClause -
  77  *        Process the FROM clause and add items to the query's range table,
  78  *        joinlist, and namespaces.
  79  *
  80  * Note: we assume that pstate's p_rtable, p_joinlist, p_relnamespace, and
  81  * p_varnamespace lists were initialized to NIL when the pstate was created.
  82  * We will add onto any entries already present --- this is needed for rule
  83  * processing, as well as for UPDATE and DELETE.
  84  *
  85  * The range table may grow still further when we transform the expressions
  86  * in the query's quals and target list. (This is possible because in
  87  * POSTQUEL, we allowed references to relations not specified in the
  88  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
  89  */
  90 void
  91 transformFromClause(ParseState *pstate, List *frmList)
  92 {
  93         ListCell   *fl;
  94
  95         /*
  96          * The grammar will have produced a list of RangeVars, RangeSubselects,
  97          * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding
  98          * entries to the rtable), check for duplicate refnames, and then add it
  99          * to the joinlist and namespaces.
 100          */
 101         foreach(fl, frmList)
 102         {
 103                 Node       *n = lfirst(fl);
 104                 RangeTblEntry *rte;
 105                 int                     rtindex;
 106                 List       *relnamespace;
 107                 Relids          containedRels;
 108
 109                 n = transformFromClauseItem(pstate, n,
 110                                                                         &rte,
 111                                                                         &rtindex,
 112                                                                         &relnamespace,
 113                                                                         &containedRels);
 114                 checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
 115                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
 116                 pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
 117                                                                                          relnamespace);
 118                 pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
 119                 bms_free(containedRels);
 120         }
 121 }
 122
 123 /*
 124  * setTargetTable
 125  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
 126  *        and make the special links to it in the ParseState.
 127  *
 128  *        We also open the target relation and acquire a write lock on it.
 129  *        This must be done before processing the FROM list, in case the target
 130  *        is also mentioned as a source relation --- we want to be sure to grab
 131  *        the write lock before any read lock.
 132  *
 133  *        If alsoSource is true, add the target to the query's joinlist and
 134  *        namespace.  For INSERT, we don't want the target to be joined to;
 135  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
 136  *        we do need to scan or join the target.  (NOTE: we do not bother
 137  *        to check for namespace conflict; we assume that the namespace was
 138  *        initially empty in these cases.)
 139  *
 140  *        Finally, we mark the relation as requiring the permissions specified
 141  *        by requiredPerms.
 142  *
 143  *        Returns the rangetable index of the target relation.
 144  */
 145 int
 146 setTargetTable(ParseState *pstate, RangeVar *relation,
 147                            bool inh, bool alsoSource, AclMode requiredPerms)
 148 {
 149         RangeTblEntry *rte;
 150         int                     rtindex;
 151
 152         /* Close old target; this could only happen for multi-action rules */
 153         if (pstate->p_target_relation != NULL)
 154                 heap_close(pstate->p_target_relation, NoLock);
 155
 156         /*
 157          * Open target rel and grab suitable lock (which we will hold till end of
 158          * transaction).
 159          *
 160          * free_parsestate() will eventually do the corresponding heap_close(),
 161          * but *not* release the lock.
 162          */
 163         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
 164
 165         /*
 166          * Now build an RTE.
 167          */
 168         rte = addRangeTableEntryForRelation(pstate, pstate->p_target_relation,
 169                                                                                 relation->alias, inh, false);
 170         pstate->p_target_rangetblentry = rte;
 171
 172         /* assume new rte is at end */
 173         rtindex = list_length(pstate->p_rtable);
 174         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 175
 176         /*
 177          * Override addRangeTableEntry's default ACL_SELECT permissions check, and
 178          * instead mark target table as requiring exactly the specified
 179          * permissions.
 180          *
 181          * If we find an explicit reference to the rel later during parse
 182          * analysis, we will add the ACL_SELECT bit back again; see
 183          * scanRTEForColumn (for simple field references), ExpandColumnRefStar
 184          * (for foo.*) and ExpandAllTables (for *).
 185          */
 186         rte->requiredPerms = requiredPerms;
 187
 188         /*
 189          * If UPDATE/DELETE, add table to joinlist and namespaces.
 190          */
 191         if (alsoSource)
 192                 addRTEtoQuery(pstate, rte, true, true, true);
 193
 194         return rtindex;
 195 }
 196
 197 /*
 198  * Simplify InhOption (yes/no/default) into boolean yes/no.
 199  *
 200  * The reason we do things this way is that we don't want to examine the
 201  * SQL_inheritance option flag until parse_analyze() is run.    Otherwise,
 202  * we'd do the wrong thing with query strings that intermix SET commands
 203  * with queries.
 204  */
 205 bool
 206 interpretInhOption(InhOption inhOpt)
 207 {
 208         switch (inhOpt)
 209         {
 210                 case INH_NO:
 211                         return false;
 212                 case INH_YES:
 213                         return true;
 214                 case INH_DEFAULT:
 215                         return SQL_inheritance;
 216         }
 217         elog(ERROR, "bogus InhOption value: %d", inhOpt);
 218         return false;                           /* keep compiler quiet */
 219 }
 220
 221 /*
 222  * Given a relation-options list (of DefElems), return true iff the specified
 223  * table/result set should be created with OIDs. This needs to be done after
 224  * parsing the query string because the return value can depend upon the
 225  * default_with_oids GUC var.
 226  */
 227 bool
 228 interpretOidsOption(List *defList)
 229 {
 230         ListCell   *cell;
 231
 232         /* Scan list to see if OIDS was included */
 233         foreach(cell, defList)
 234         {
 235                 DefElem    *def = (DefElem *) lfirst(cell);
 236
 237                 if (pg_strcasecmp(def->defname, "oids") == 0)
 238                         return defGetBoolean(def);
 239         }
 240
 241         /* OIDS option was not specified, so use default. */
 242         return default_with_oids;
 243 }
 244
 245 /*
 246  * Extract all not-in-common columns from column lists of a source table
 247  */
 248 static void
 249 extractRemainingColumns(List *common_colnames,
 250                                                 List *src_colnames, List *src_colvars,
 251                                                 List **res_colnames, List **res_colvars)
 252 {
 253         List       *new_colnames = NIL;
 254         List       *new_colvars = NIL;
 255         ListCell   *lnames,
 256                            *lvars;
 257
 258         Assert(list_length(src_colnames) == list_length(src_colvars));
 259
 260         forboth(lnames, src_colnames, lvars, src_colvars)
 261         {
 262                 char       *colname = strVal(lfirst(lnames));
 263                 bool            match = false;
 264                 ListCell   *cnames;
 265
 266                 foreach(cnames, common_colnames)
 267                 {
 268                         char       *ccolname = strVal(lfirst(cnames));
 269
 270                         if (strcmp(colname, ccolname) == 0)
 271                         {
 272                                 match = true;
 273                                 break;
 274                         }
 275                 }
 276
 277                 if (!match)
 278                 {
 279                         new_colnames = lappend(new_colnames, lfirst(lnames));
 280                         new_colvars = lappend(new_colvars, lfirst(lvars));
 281                 }
 282         }
 283
 284         *res_colnames = new_colnames;
 285         *res_colvars = new_colvars;
 286 }
 287
 288 /* transformJoinUsingClause()
 289  *        Build a complete ON clause from a partially-transformed USING list.
 290  *        We are given lists of nodes representing left and right match columns.
 291  *        Result is a transformed qualification expression.
 292  */
 293 static Node *
 294 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
 295 {
 296         Node       *result = NULL;
 297         ListCell   *lvars,
 298                            *rvars;
 299
 300         /*
 301          * We cheat a little bit here by building an untransformed operator tree
 302          * whose leaves are the already-transformed Vars.  This is OK because
 303          * transformExpr() won't complain about already-transformed subnodes.
 304          */
 305         forboth(lvars, leftVars, rvars, rightVars)
 306         {
 307                 Node       *lvar = (Node *) lfirst(lvars);
 308                 Node       *rvar = (Node *) lfirst(rvars);
 309                 A_Expr     *e;
 310
 311                 e = makeSimpleA_Expr(AEXPR_OP, "=",
 312                                                          copyObject(lvar), copyObject(rvar),
 313                                                          -1);
 314
 315                 if (result == NULL)
 316                         result = (Node *) e;
 317                 else
 318                 {
 319                         A_Expr     *a;
 320
 321                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e, -1);
 322                         result = (Node *) a;
 323                 }
 324         }
 325
 326         /*
 327          * Since the references are already Vars, and are certainly from the input
 328          * relations, we don't have to go through the same pushups that
 329          * transformJoinOnClause() does.  Just invoke transformExpr() to fix up
 330          * the operators, and we're done.
 331          */
 332         result = transformExpr(pstate, result);
 333
 334         result = coerce_to_boolean(pstate, result, "JOIN/USING");
 335
 336         return result;
 337 }
 338
 339 /* transformJoinOnClause()
 340  *        Transform the qual conditions for JOIN/ON.
 341  *        Result is a transformed qualification expression.
 342  */
 343 static Node *
 344 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
 345                                           RangeTblEntry *l_rte,
 346                                           RangeTblEntry *r_rte,
 347                                           List *relnamespace,
 348                                           Relids containedRels)
 349 {
 350         Node       *result;
 351         List       *save_relnamespace;
 352         List       *save_varnamespace;
 353         Relids          clause_varnos;
 354         int                     varno;
 355
 356         /*
 357          * This is a tad tricky, for two reasons.  First, the namespace that the
 358          * join expression should see is just the two subtrees of the JOIN plus
 359          * any outer references from upper pstate levels.  So, temporarily set
 360          * this pstate's namespace accordingly.  (We need not check for refname
 361          * conflicts, because transformFromClauseItem() already did.) NOTE: this
 362          * code is OK only because the ON clause can't legally alter the namespace
 363          * by causing implicit relation refs to be added.
 364          */
 365         save_relnamespace = pstate->p_relnamespace;
 366         save_varnamespace = pstate->p_varnamespace;
 367
 368         pstate->p_relnamespace = relnamespace;
 369         pstate->p_varnamespace = list_make2(l_rte, r_rte);
 370
 371         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
 372
 373         pstate->p_relnamespace = save_relnamespace;
 374         pstate->p_varnamespace = save_varnamespace;
 375
 376         /*
 377          * Second, we need to check that the ON condition doesn't refer to any
 378          * rels outside the input subtrees of the JOIN.  It could do that despite
 379          * our hack on the namespace if it uses fully-qualified names. So, grovel
 380          * through the transformed clause and make sure there are no bogus
 381          * references.  (Outer references are OK, and are ignored here.)
 382          */
 383         clause_varnos = pull_varnos(result);
 384         clause_varnos = bms_del_members(clause_varnos, containedRels);
 385         if ((varno = bms_first_member(clause_varnos)) >= 0)
 386         {
 387                 ereport(ERROR,
 388                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 389                  errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
 390                                 rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
 391         }
 392         bms_free(clause_varnos);
 393
 394         return result;
 395 }
 396
 397 /*
 398  * transformTableEntry --- transform a RangeVar (simple relation reference)
 399  */
 400 static RangeTblEntry *
 401 transformTableEntry(ParseState *pstate, RangeVar *r)
 402 {
 403         RangeTblEntry *rte;
 404
 405         /*
 406          * mark this entry to indicate it comes from the FROM clause. In SQL, the
 407          * target list can only refer to range variables specified in the from
 408          * clause but we follow the more powerful POSTQUEL semantics and
 409          * automatically generate the range variable if not specified. However
 410          * there are times we need to know whether the entries are legitimate.
 411          */
 412         rte = addRangeTableEntry(pstate, r, r->alias,
 413                                                          interpretInhOption(r->inhOpt), true);
 414
 415         return rte;
 416 }
 417
 418
 419 /*
 420  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
 421  */
 422 static RangeTblEntry *
 423 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
 424 {
 425         Query      *query;
 426         RangeTblEntry *rte;
 427
 428         /*
 429          * We require user to supply an alias for a subselect, per SQL92. To relax
 430          * this, we'd have to be prepared to gin up a unique alias for an
 431          * unlabeled subselect.
 432          */
 433         if (r->alias == NULL)
 434                 ereport(ERROR,
 435                                 (errcode(ERRCODE_SYNTAX_ERROR),
 436                                  errmsg("subquery in FROM must have an alias")));
 437
 438         /*
 439          * Analyze and transform the subquery.
 440          */
 441         query = parse_sub_analyze(r->subquery, pstate);
 442
 443         /*
 444          * Check that we got something reasonable.      Many of these conditions are
 445          * impossible given restrictions of the grammar, but check 'em anyway.
 446          */
 447         if (query->commandType != CMD_SELECT ||
 448                 query->utilityStmt != NULL)
 449                 elog(ERROR, "expected SELECT query from subquery in FROM");
 450         if (query->intoClause != NULL)
 451                 ereport(ERROR,
 452                                 (errcode(ERRCODE_SYNTAX_ERROR),
 453                                  errmsg("subquery in FROM cannot have SELECT INTO")));
 454
 455         /*
 456          * The subquery cannot make use of any variables from FROM items created
 457          * earlier in the current query.  Per SQL92, the scope of a FROM item does
 458          * not include other FROM items.  Formerly we hacked the namespace so that
 459          * the other variables weren't even visible, but it seems more useful to
 460          * leave them visible and give a specific error message.
 461          *
 462          * XXX this will need further work to support SQL99's LATERAL() feature,
 463          * wherein such references would indeed be legal.
 464          *
 465          * We can skip groveling through the subquery if there's not anything
 466          * visible in the current query.  Also note that outer references are OK.
 467          */
 468         if (pstate->p_relnamespace || pstate->p_varnamespace)
 469         {
 470                 if (contain_vars_of_level((Node *) query, 1))
 471                         ereport(ERROR,
 472                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 473                                          errmsg("subquery in FROM cannot refer to other relations of same query level")));
 474         }
 475
 476         /*
 477          * OK, build an RTE for the subquery.
 478          */
 479         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
 480
 481         return rte;
 482 }
 483
 484
 485 /*
 486  * transformRangeFunction --- transform a function call appearing in FROM
 487  */
 488 static RangeTblEntry *
 489 transformRangeFunction(ParseState *pstate, RangeFunction *r)
 490 {
 491         Node       *funcexpr;
 492         char       *funcname;
 493         RangeTblEntry *rte;
 494
 495         /*
 496          * Get function name for possible use as alias.  We use the same
 497          * transformation rules as for a SELECT output expression.      For a FuncCall
 498          * node, the result will be the function name, but it is possible for the
 499          * grammar to hand back other node types.
 500          */
 501         funcname = FigureColname(r->funccallnode);
 502
 503         /*
 504          * Transform the raw expression.
 505          */
 506         funcexpr = transformExpr(pstate, r->funccallnode);
 507
 508         /*
 509          * The function parameters cannot make use of any variables from other
 510          * FROM items.  (Compare to transformRangeSubselect(); the coding is
 511          * different though because we didn't parse as a sub-select with its own
 512          * level of namespace.)
 513          *
 514          * XXX this will need further work to support SQL99's LATERAL() feature,
 515          * wherein such references would indeed be legal.
 516          */
 517         if (pstate->p_relnamespace || pstate->p_varnamespace)
 518         {
 519                 if (contain_vars_of_level(funcexpr, 0))
 520                         ereport(ERROR,
 521                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 522                                          errmsg("function expression in FROM cannot refer to other relations of same query level")));
 523         }
 524
 525         /*
 526          * Disallow aggregate functions in the expression.      (No reason to postpone
 527          * this check until parseCheckAggregates.)
 528          */
 529         if (pstate->p_hasAggs)
 530         {
 531                 if (checkExprHasAggs(funcexpr))
 532                         ereport(ERROR,
 533                                         (errcode(ERRCODE_GROUPING_ERROR),
 534                                          errmsg("cannot use aggregate function in function expression in FROM")));
 535         }
 536
 537         /*
 538          * OK, build an RTE for the function.
 539          */
 540         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
 541                                                                                 r, true);
 542
 543         /*
 544          * If a coldeflist was supplied, ensure it defines a legal set of names
 545          * (no duplicates) and datatypes (no pseudo-types, for instance).
 546          * addRangeTableEntryForFunction looked up the type names but didn't check
 547          * them further than that.
 548          */
 549         if (r->coldeflist)
 550         {
 551                 TupleDesc       tupdesc;
 552
 553                 tupdesc = BuildDescFromLists(rte->eref->colnames,
 554                                                                          rte->funccoltypes,
 555                                                                          rte->funccoltypmods);
 556                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
 557         }
 558
 559         return rte;
 560 }
 561
 562
 563 /*
 564  * transformFromClauseItem -
 565  *        Transform a FROM-clause item, adding any required entries to the
 566  *        range table list being built in the ParseState, and return the
 567  *        transformed item ready to include in the joinlist and namespaces.
 568  *        This routine can recurse to handle SQL92 JOIN expressions.
 569  *
 570  * The function return value is the node to add to the jointree (a
 571  * RangeTblRef or JoinExpr).  Additional output parameters are:
 572  *
 573  * *top_rte: receives the RTE corresponding to the jointree item.
 574  * (We could extract this from the function return node, but it saves cycles
 575  * to pass it back separately.)
 576  *
 577  * *top_rti: receives the rangetable index of top_rte.  (Ditto.)
 578  *
 579  * *relnamespace: receives a List of the RTEs exposed as relation names
 580  * by this item.
 581  *
 582  * *containedRels: receives a bitmap set of the rangetable indexes
 583  * of all the base and join relations represented in this jointree item.
 584  * This is needed for checking JOIN/ON conditions in higher levels.
 585  *
 586  * We do not need to pass back an explicit varnamespace value, because
 587  * in all cases the varnamespace contribution is exactly top_rte.
 588  */
 589 static Node *
 590 transformFromClauseItem(ParseState *pstate, Node *n,
 591                                                 RangeTblEntry **top_rte, int *top_rti,
 592                                                 List **relnamespace,
 593                                                 Relids *containedRels)
 594 {
 595         if (IsA(n, RangeVar))
 596         {
 597                 /* Plain relation reference */
 598                 RangeTblRef *rtr;
 599                 RangeTblEntry *rte;
 600                 int                     rtindex;
 601
 602                 rte = transformTableEntry(pstate, (RangeVar *) n);
 603                 /* assume new rte is at end */
 604                 rtindex = list_length(pstate->p_rtable);
 605                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 606                 *top_rte = rte;
 607                 *top_rti = rtindex;
 608                 *relnamespace = list_make1(rte);
 609                 *containedRels = bms_make_singleton(rtindex);
 610                 rtr = makeNode(RangeTblRef);
 611                 rtr->rtindex = rtindex;
 612                 return (Node *) rtr;
 613         }
 614         else if (IsA(n, RangeSubselect))
 615         {
 616                 /* sub-SELECT is like a plain relation */
 617                 RangeTblRef *rtr;
 618                 RangeTblEntry *rte;
 619                 int                     rtindex;
 620
 621                 rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
 622                 /* assume new rte is at end */
 623                 rtindex = list_length(pstate->p_rtable);
 624                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 625                 *top_rte = rte;
 626                 *top_rti = rtindex;
 627                 *relnamespace = list_make1(rte);
 628                 *containedRels = bms_make_singleton(rtindex);
 629                 rtr = makeNode(RangeTblRef);
 630                 rtr->rtindex = rtindex;
 631                 return (Node *) rtr;
 632         }
 633         else if (IsA(n, RangeFunction))
 634         {
 635                 /* function is like a plain relation */
 636                 RangeTblRef *rtr;
 637                 RangeTblEntry *rte;
 638                 int                     rtindex;
 639
 640                 rte = transformRangeFunction(pstate, (RangeFunction *) n);
 641                 /* assume new rte is at end */
 642                 rtindex = list_length(pstate->p_rtable);
 643                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 644                 *top_rte = rte;
 645                 *top_rti = rtindex;
 646                 *relnamespace = list_make1(rte);
 647                 *containedRels = bms_make_singleton(rtindex);
 648                 rtr = makeNode(RangeTblRef);
 649                 rtr->rtindex = rtindex;
 650                 return (Node *) rtr;
 651         }
 652         else if (IsA(n, JoinExpr))
 653         {
 654                 /* A newfangled join expression */
 655                 JoinExpr   *j = (JoinExpr *) n;
 656                 RangeTblEntry *l_rte;
 657                 RangeTblEntry *r_rte;
 658                 int                     l_rtindex;
 659                 int                     r_rtindex;
 660                 Relids          l_containedRels,
 661                                         r_containedRels,
 662                                         my_containedRels;
 663                 List       *l_relnamespace,
 664                                    *r_relnamespace,
 665                                    *my_relnamespace,
 666                                    *l_colnames,
 667                                    *r_colnames,
 668                                    *res_colnames,
 669                                    *l_colvars,
 670                                    *r_colvars,
 671                                    *res_colvars;
 672                 RangeTblEntry *rte;
 673
 674                 /*
 675                  * Recursively process the left and right subtrees
 676                  */
 677                 j->larg = transformFromClauseItem(pstate, j->larg,
 678                                                                                   &l_rte,
 679                                                                                   &l_rtindex,
 680                                                                                   &l_relnamespace,
 681                                                                                   &l_containedRels);
 682                 j->rarg = transformFromClauseItem(pstate, j->rarg,
 683                                                                                   &r_rte,
 684                                                                                   &r_rtindex,
 685                                                                                   &r_relnamespace,
 686                                                                                   &r_containedRels);
 687
 688                 /*
 689                  * Check for conflicting refnames in left and right subtrees. Must do
 690                  * this because higher levels will assume I hand back a self-
 691                  * consistent namespace subtree.
 692                  */
 693                 checkNameSpaceConflicts(pstate, l_relnamespace, r_relnamespace);
 694
 695                 /*
 696                  * Generate combined relation membership info for possible use by
 697                  * transformJoinOnClause below.
 698                  */
 699                 my_relnamespace = list_concat(l_relnamespace, r_relnamespace);
 700                 my_containedRels = bms_join(l_containedRels, r_containedRels);
 701
 702                 pfree(r_relnamespace);  /* free unneeded list header */
 703
 704                 /*
 705                  * Extract column name and var lists from both subtrees
 706                  *
 707                  * Note: expandRTE returns new lists, safe for me to modify
 708                  */
 709                 expandRTE(l_rte, l_rtindex, 0, false,
 710                                   &l_colnames, &l_colvars);
 711                 expandRTE(r_rte, r_rtindex, 0, false,
 712                                   &r_colnames, &r_colvars);
 713
 714                 /*
 715                  * Natural join does not explicitly specify columns; must generate
 716                  * columns to join. Need to run through the list of columns from each
 717                  * table or join result and match up the column names. Use the first
 718                  * table, and check every column in the second table for a match.
 719                  * (We'll check that the matches were unique later on.) The result of
 720                  * this step is a list of column names just like an explicitly-written
 721                  * USING list.
 722                  */
 723                 if (j->isNatural)
 724                 {
 725                         List       *rlist = NIL;
 726                         ListCell   *lx,
 727                                            *rx;
 728
 729                         Assert(j->using == NIL);        /* shouldn't have USING() too */
 730
 731                         foreach(lx, l_colnames)
 732                         {
 733                                 char       *l_colname = strVal(lfirst(lx));
 734                                 Value      *m_name = NULL;
 735
 736                                 foreach(rx, r_colnames)
 737                                 {
 738                                         char       *r_colname = strVal(lfirst(rx));
 739
 740                                         if (strcmp(l_colname, r_colname) == 0)
 741                                         {
 742                                                 m_name = makeString(l_colname);
 743                                                 break;
 744                                         }
 745                                 }
 746
 747                                 /* matched a right column? then keep as join column... */
 748                                 if (m_name != NULL)
 749                                         rlist = lappend(rlist, m_name);
 750                         }
 751
 752                         j->using = rlist;
 753                 }
 754
 755                 /*
 756                  * Now transform the join qualifications, if any.
 757                  */
 758                 res_colnames = NIL;
 759                 res_colvars = NIL;
 760
 761                 if (j->using)
 762                 {
 763                         /*
 764                          * JOIN/USING (or NATURAL JOIN, as transformed above). Transform
 765                          * the list into an explicit ON-condition, and generate a list of
 766                          * merged result columns.
 767                          */
 768                         List       *ucols = j->using;
 769                         List       *l_usingvars = NIL;
 770                         List       *r_usingvars = NIL;
 771                         ListCell   *ucol;
 772
 773                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
 774
 775                         foreach(ucol, ucols)
 776                         {
 777                                 char       *u_colname = strVal(lfirst(ucol));
 778                                 ListCell   *col;
 779                                 int                     ndx;
 780                                 int                     l_index = -1;
 781                                 int                     r_index = -1;
 782                                 Var                *l_colvar,
 783                                                    *r_colvar;
 784
 785                                 /* Check for USING(foo,foo) */
 786                                 foreach(col, res_colnames)
 787                                 {
 788                                         char       *res_colname = strVal(lfirst(col));
 789
 790                                         if (strcmp(res_colname, u_colname) == 0)
 791                                                 ereport(ERROR,
 792                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
 793                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
 794                                                                                 u_colname)));
 795                                 }
 796
 797                                 /* Find it in left input */
 798                                 ndx = 0;
 799                                 foreach(col, l_colnames)
 800                                 {
 801                                         char       *l_colname = strVal(lfirst(col));
 802
 803                                         if (strcmp(l_colname, u_colname) == 0)
 804                                         {
 805                                                 if (l_index >= 0)
 806                                                         ereport(ERROR,
 807                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 808                                                                          errmsg("common column name \"%s\" appears more than once in left table",
 809                                                                                         u_colname)));
 810                                                 l_index = ndx;
 811                                         }
 812                                         ndx++;
 813                                 }
 814                                 if (l_index < 0)
 815                                         ereport(ERROR,
 816                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 817                                                          errmsg("column \"%s\" specified in USING clause does not exist in left table",
 818                                                                         u_colname)));
 819
 820                                 /* Find it in right input */
 821                                 ndx = 0;
 822                                 foreach(col, r_colnames)
 823                                 {
 824                                         char       *r_colname = strVal(lfirst(col));
 825
 826                                         if (strcmp(r_colname, u_colname) == 0)
 827                                         {
 828                                                 if (r_index >= 0)
 829                                                         ereport(ERROR,
 830                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 831                                                                          errmsg("common column name \"%s\" appears more than once in right table",
 832                                                                                         u_colname)));
 833                                                 r_index = ndx;
 834                                         }
 835                                         ndx++;
 836                                 }
 837                                 if (r_index < 0)
 838                                         ereport(ERROR,
 839                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 840                                                          errmsg("column \"%s\" specified in USING clause does not exist in right table",
 841                                                                         u_colname)));
 842
 843                                 l_colvar = list_nth(l_colvars, l_index);
 844                                 l_usingvars = lappend(l_usingvars, l_colvar);
 845                                 r_colvar = list_nth(r_colvars, r_index);
 846                                 r_usingvars = lappend(r_usingvars, r_colvar);
 847
 848                                 res_colnames = lappend(res_colnames, lfirst(ucol));
 849                                 res_colvars = lappend(res_colvars,
 850                                                                           buildMergedJoinVar(pstate,
 851                                                                                                                  j->jointype,
 852                                                                                                                  l_colvar,
 853                                                                                                                  r_colvar));
 854                         }
 855
 856                         j->quals = transformJoinUsingClause(pstate,
 857                                                                                                 l_usingvars,
 858                                                                                                 r_usingvars);
 859                 }
 860                 else if (j->quals)
 861                 {
 862                         /* User-written ON-condition; transform it */
 863                         j->quals = transformJoinOnClause(pstate, j,
 864                                                                                          l_rte, r_rte,
 865                                                                                          my_relnamespace,
 866                                                                                          my_containedRels);
 867                 }
 868                 else
 869                 {
 870                         /* CROSS JOIN: no quals */
 871                 }
 872
 873                 /* Add remaining columns from each side to the output columns */
 874                 extractRemainingColumns(res_colnames,
 875                                                                 l_colnames, l_colvars,
 876                                                                 &l_colnames, &l_colvars);
 877                 extractRemainingColumns(res_colnames,
 878                                                                 r_colnames, r_colvars,
 879                                                                 &r_colnames, &r_colvars);
 880                 res_colnames = list_concat(res_colnames, l_colnames);
 881                 res_colvars = list_concat(res_colvars, l_colvars);
 882                 res_colnames = list_concat(res_colnames, r_colnames);
 883                 res_colvars = list_concat(res_colvars, r_colvars);
 884
 885                 /*
 886                  * Check alias (AS clause), if any.
 887                  */
 888                 if (j->alias)
 889                 {
 890                         if (j->alias->colnames != NIL)
 891                         {
 892                                 if (list_length(j->alias->colnames) > list_length(res_colnames))
 893                                         ereport(ERROR,
 894                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 895                                                          errmsg("column alias list for \"%s\" has too many entries",
 896                                                                         j->alias->aliasname)));
 897                         }
 898                 }
 899
 900                 /*
 901                  * Now build an RTE for the result of the join
 902                  */
 903                 rte = addRangeTableEntryForJoin(pstate,
 904                                                                                 res_colnames,
 905                                                                                 j->jointype,
 906                                                                                 res_colvars,
 907                                                                                 j->alias,
 908                                                                                 true);
 909
 910                 /* assume new rte is at end */
 911                 j->rtindex = list_length(pstate->p_rtable);
 912                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
 913
 914                 *top_rte = rte;
 915                 *top_rti = j->rtindex;
 916
 917                 /*
 918                  * Prepare returned namespace list.  If the JOIN has an alias then it
 919                  * hides the contained RTEs as far as the relnamespace goes;
 920                  * otherwise, put the contained RTEs and *not* the JOIN into
 921                  * relnamespace.
 922                  */
 923                 if (j->alias)
 924                 {
 925                         *relnamespace = list_make1(rte);
 926                         list_free(my_relnamespace);
 927                 }
 928                 else
 929                         *relnamespace = my_relnamespace;
 930
 931                 /*
 932                  * Include join RTE in returned containedRels set
 933                  */
 934                 *containedRels = bms_add_member(my_containedRels, j->rtindex);
 935
 936                 return (Node *) j;
 937         }
 938         else
 939                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
 940         return NULL;                            /* can't get here, keep compiler quiet */
 941 }
 942
 943 /*
 944  * buildMergedJoinVar -
 945  *        generate a suitable replacement expression for a merged join column
 946  */
 947 static Node *
 948 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 949                                    Var *l_colvar, Var *r_colvar)
 950 {
 951         Oid                     outcoltype;
 952         int32           outcoltypmod;
 953         Node       *l_node,
 954                            *r_node,
 955                            *res_node;
 956
 957         /*
 958          * Choose output type if input types are dissimilar.
 959          */
 960         outcoltype = l_colvar->vartype;
 961         outcoltypmod = l_colvar->vartypmod;
 962         if (outcoltype != r_colvar->vartype)
 963         {
 964                 outcoltype = select_common_type(list_make2_oid(l_colvar->vartype,
 965                                                                                                            r_colvar->vartype),
 966                                                                                 "JOIN/USING");
 967                 outcoltypmod = -1;              /* ie, unknown */
 968         }
 969         else if (outcoltypmod != r_colvar->vartypmod)
 970         {
 971                 /* same type, but not same typmod */
 972                 outcoltypmod = -1;              /* ie, unknown */
 973         }
 974
 975         /*
 976          * Insert coercion functions if needed.  Note that a difference in typmod
 977          * can only happen if input has typmod but outcoltypmod is -1. In that
 978          * case we insert a RelabelType to clearly mark that result's typmod is
 979          * not same as input.  We never need coerce_type_typmod.
 980          */
 981         if (l_colvar->vartype != outcoltype)
 982                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
 983                                                          outcoltype, outcoltypmod,
 984                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 985         else if (l_colvar->vartypmod != outcoltypmod)
 986                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
 987                                                                                   outcoltype, outcoltypmod,
 988                                                                                   COERCE_IMPLICIT_CAST);
 989         else
 990                 l_node = (Node *) l_colvar;
 991
 992         if (r_colvar->vartype != outcoltype)
 993                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
 994                                                          outcoltype, outcoltypmod,
 995                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 996         else if (r_colvar->vartypmod != outcoltypmod)
 997                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
 998                                                                                   outcoltype, outcoltypmod,
 999                                                                                   COERCE_IMPLICIT_CAST);
1000         else
1001                 r_node = (Node *) r_colvar;
1002
1003         /*
1004          * Choose what to emit
1005          */
1006         switch (jointype)
1007         {
1008                 case JOIN_INNER:
1009
1010                         /*
1011                          * We can use either var; prefer non-coerced one if available.
1012                          */
1013                         if (IsA(l_node, Var))
1014                                 res_node = l_node;
1015                         else if (IsA(r_node, Var))
1016                                 res_node = r_node;
1017                         else
1018                                 res_node = l_node;
1019                         break;
1020                 case JOIN_LEFT:
1021                         /* Always use left var */
1022                         res_node = l_node;
1023                         break;
1024                 case JOIN_RIGHT:
1025                         /* Always use right var */
1026                         res_node = r_node;
1027                         break;
1028                 case JOIN_FULL:
1029                         {
1030                                 /*
1031                                  * Here we must build a COALESCE expression to ensure that the
1032                                  * join output is non-null if either input is.
1033                                  */
1034                                 CoalesceExpr *c = makeNode(CoalesceExpr);
1035
1036                                 c->coalescetype = outcoltype;
1037                                 c->args = list_make2(l_node, r_node);
1038                                 res_node = (Node *) c;
1039                                 break;
1040                         }
1041                 default:
1042                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
1043                         res_node = NULL;        /* keep compiler quiet */
1044                         break;
1045         }
1046
1047         return res_node;
1048 }
1049
1050
1051 /*
1052  * transformWhereClause -
1053  *        Transform the qualification and make sure it is of type boolean.
1054  *        Used for WHERE and allied clauses.
1055  *
1056  * constructName does not affect the semantics, but is used in error messages
1057  */
1058 Node *
1059 transformWhereClause(ParseState *pstate, Node *clause,
1060                                          const char *constructName)
1061 {
1062         Node       *qual;
1063
1064         if (clause == NULL)
1065                 return NULL;
1066
1067         qual = transformExpr(pstate, clause);
1068
1069         qual = coerce_to_boolean(pstate, qual, constructName);
1070
1071         return qual;
1072 }
1073
1074
1075 /*
1076  * transformLimitClause -
1077  *        Transform the expression and make sure it is of type bigint.
1078  *        Used for LIMIT and allied clauses.
1079  *
1080  * Note: as of Postgres 8.2, LIMIT expressions are expected to yield int8,
1081  * rather than int4 as before.
1082  *
1083  * constructName does not affect the semantics, but is used in error messages
1084  */
1085 Node *
1086 transformLimitClause(ParseState *pstate, Node *clause,
1087                                          const char *constructName)
1088 {
1089         Node       *qual;
1090
1091         if (clause == NULL)
1092                 return NULL;
1093
1094         qual = transformExpr(pstate, clause);
1095
1096         qual = coerce_to_specific_type(pstate, qual, INT8OID, constructName);
1097
1098         /*
1099          * LIMIT can't refer to any vars or aggregates of the current query
1100          */
1101         if (contain_vars_of_level(qual, 0))
1102         {
1103                 ereport(ERROR,
1104                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1105                 /* translator: %s is name of a SQL construct, eg LIMIT */
1106                                  errmsg("argument of %s must not contain variables",
1107                                                 constructName)));
1108         }
1109         if (checkExprHasAggs(qual))
1110         {
1111                 ereport(ERROR,
1112                                 (errcode(ERRCODE_GROUPING_ERROR),
1113                 /* translator: %s is name of a SQL construct, eg LIMIT */
1114                                  errmsg("argument of %s must not contain aggregates",
1115                                                 constructName)));
1116         }
1117
1118         return qual;
1119 }
1120
1121
1122 /*
1123  *      findTargetlistEntry -
1124  *        Returns the targetlist entry matching the given (untransformed) node.
1125  *        If no matching entry exists, one is created and appended to the target
1126  *        list as a "resjunk" node.
1127  *
1128  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1129  * tlist        the target list (passed by reference so we can append to it)
1130  * clause       identifies clause type being processed
1131  */
1132 static TargetEntry *
1133 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1134 {
1135         TargetEntry *target_result = NULL;
1136         ListCell   *tl;
1137         Node       *expr;
1138
1139         /*----------
1140          * Handle two special cases as mandated by the SQL92 spec:
1141          *
1142          * 1. Bare ColumnName (no qualifier or subscripts)
1143          *        For a bare identifier, we search for a matching column name
1144          *        in the existing target list.  Multiple matches are an error
1145          *        unless they refer to identical values; for example,
1146          *        we allow      SELECT a, a FROM table ORDER BY a
1147          *        but not       SELECT a AS b, b FROM table ORDER BY b
1148          *        If no match is found, we fall through and treat the identifier
1149          *        as an expression.
1150          *        For GROUP BY, it is incorrect to match the grouping item against
1151          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1152          *        is a reference to a column name exposed by FROM, not to a target
1153          *        list column.  However, many implementations (including pre-7.0
1154          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1155          *        to see if the identifier matches any FROM column name, and only
1156          *        try for a targetlist name if it doesn't.  This ensures that we
1157          *        adhere to the spec in the case where the name could be both.
1158          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1159          *        we choose to make it work like ORDER BY, on the rather flimsy
1160          *        grounds that ordinary DISTINCT works on targetlist entries.
1161          *
1162          * 2. IntegerConstant
1163          *        This means to use the n'th item in the existing target list.
1164          *        Note that it would make no sense to order/group/distinct by an
1165          *        actual constant, so this does not create a conflict with our
1166          *        extension to order/group by an expression.
1167          *        GROUP BY column-number is not allowed by SQL92, but since
1168          *        the standard has no other behavior defined for this syntax,
1169          *        we may as well accept this common extension.
1170          *
1171          * Note that pre-existing resjunk targets must not be used in either case,
1172          * since the user didn't write them in his SELECT list.
1173          *
1174          * If neither special case applies, fall through to treat the item as
1175          * an expression.
1176          *----------
1177          */
1178         if (IsA(node, ColumnRef) &&
1179                 list_length(((ColumnRef *) node)->fields) == 1)
1180         {
1181                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
1182                 int                     location = ((ColumnRef *) node)->location;
1183
1184                 if (clause == GROUP_CLAUSE)
1185                 {
1186                         /*
1187                          * In GROUP BY, we must prefer a match against a FROM-clause
1188                          * column to one against the targetlist.  Look to see if there is
1189                          * a matching column.  If so, fall through to let transformExpr()
1190                          * do the rest.  NOTE: if name could refer ambiguously to more
1191                          * than one column name exposed by FROM, colNameToVar will
1192                          * ereport(ERROR).      That's just what we want here.
1193                          *
1194                          * Small tweak for 7.4.3: ignore matches in upper query levels.
1195                          * This effectively changes the search order for bare names to (1)
1196                          * local FROM variables, (2) local targetlist aliases, (3) outer
1197                          * FROM variables, whereas before it was (1) (3) (2). SQL92 and
1198                          * SQL99 do not allow GROUPing BY an outer reference, so this
1199                          * breaks no cases that are legal per spec, and it seems a more
1200                          * self-consistent behavior.
1201                          */
1202                         if (colNameToVar(pstate, name, true, location) != NULL)
1203                                 name = NULL;
1204                 }
1205
1206                 if (name != NULL)
1207                 {
1208                         foreach(tl, *tlist)
1209                         {
1210                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1211
1212                                 if (!tle->resjunk &&
1213                                         strcmp(tle->resname, name) == 0)
1214                                 {
1215                                         if (target_result != NULL)
1216                                         {
1217                                                 if (!equal(target_result->expr, tle->expr))
1218                                                         ereport(ERROR,
1219                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1220
1221                                                         /*------
1222                                                           translator: first %s is name of a SQL construct, eg ORDER BY */
1223                                                                          errmsg("%s \"%s\" is ambiguous",
1224                                                                                         clauseText[clause], name),
1225                                                                          parser_errposition(pstate, location)));
1226                                         }
1227                                         else
1228                                                 target_result = tle;
1229                                         /* Stay in loop to check for ambiguity */
1230                                 }
1231                         }
1232                         if (target_result != NULL)
1233                                 return target_result;   /* return the first match */
1234                 }
1235         }
1236         if (IsA(node, A_Const))
1237         {
1238                 Value      *val = &((A_Const *) node)->val;
1239                 int                     targetlist_pos = 0;
1240                 int                     target_pos;
1241
1242                 if (!IsA(val, Integer))
1243                         ereport(ERROR,
1244                                         (errcode(ERRCODE_SYNTAX_ERROR),
1245                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1246                                          errmsg("non-integer constant in %s",
1247                                                         clauseText[clause])));
1248                 target_pos = intVal(val);
1249                 foreach(tl, *tlist)
1250                 {
1251                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1252
1253                         if (!tle->resjunk)
1254                         {
1255                                 if (++targetlist_pos == target_pos)
1256                                         return tle; /* return the unique match */
1257                         }
1258                 }
1259                 ereport(ERROR,
1260                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1261                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1262                                  errmsg("%s position %d is not in select list",
1263                                                 clauseText[clause], target_pos)));
1264         }
1265
1266         /*
1267          * Otherwise, we have an expression (this is a Postgres extension not
1268          * found in SQL92).  Convert the untransformed node to a transformed
1269          * expression, and search for a match in the tlist. NOTE: it doesn't
1270          * really matter whether there is more than one match.  Also, we are
1271          * willing to match a resjunk target here, though the above cases must
1272          * ignore resjunk targets.
1273          */
1274         expr = transformExpr(pstate, node);
1275
1276         foreach(tl, *tlist)
1277         {
1278                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1279
1280                 if (equal(expr, tle->expr))
1281                         return tle;
1282         }
1283
1284         /*
1285          * If no matches, construct a new target entry which is appended to the
1286          * end of the target list.      This target is given resjunk = TRUE so that it
1287          * will not be projected into the final tuple.
1288          */
1289         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1290
1291         *tlist = lappend(*tlist, target_result);
1292
1293         return target_result;
1294 }
1295
1296 /*
1297  * transformGroupClause -
1298  *        transform a GROUP BY clause
1299  *
1300  * GROUP BY items will be added to the targetlist (as resjunk columns)
1301  * if not already present, so the targetlist must be passed by reference.
1302  */
1303 List *
1304 transformGroupClause(ParseState *pstate, List *grouplist,
1305                                          List **targetlist, List *sortClause)
1306 {
1307         List       *result = NIL;
1308         ListCell   *gl;
1309
1310         foreach(gl, grouplist)
1311         {
1312                 Node       *gexpr = (Node *) lfirst(gl);
1313                 TargetEntry *tle;
1314                 bool            found = false;
1315
1316                 tle = findTargetlistEntry(pstate, gexpr,
1317                                                                   targetlist, GROUP_CLAUSE);
1318
1319                 /* Eliminate duplicates (GROUP BY x, x) */
1320                 if (targetIsInSortList(tle, InvalidOid, result))
1321                         continue;
1322
1323                 /*
1324                  * If the GROUP BY tlist entry also appears in ORDER BY, copy operator
1325                  * info from the (first) matching ORDER BY item.  This means that if
1326                  * you write something like "GROUP BY foo ORDER BY foo USING <<<", the
1327                  * GROUP BY operation silently takes on the equality semantics implied
1328                  * by the ORDER BY.  There are two reasons to do this: it improves
1329                  * the odds that we can implement both GROUP BY and ORDER BY with a
1330                  * single sort step, and it allows the user to choose the equality
1331                  * semantics used by GROUP BY, should she be working with a datatype
1332                  * that has more than one equality operator.
1333                  */
1334                 if (tle->ressortgroupref > 0)
1335                 {
1336                         ListCell   *sl;
1337
1338                         foreach(sl, sortClause)
1339                         {
1340                                 SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
1341
1342                                 if (sc->tleSortGroupRef == tle->ressortgroupref)
1343                                 {
1344                                         result = lappend(result, copyObject(sc));
1345                                         found = true;
1346                                         break;
1347                                 }
1348                         }
1349                 }
1350
1351                 /*
1352                  * If no match in ORDER BY, just add it to the result using
1353                  * default sort/group semantics.
1354                  *
1355                  * XXX for now, the planner requires groupClause to be sortable,
1356                  * so we have to insist on that here.
1357                  */
1358                 if (!found)
1359                         result = addTargetToGroupList(pstate, tle,
1360                                                                                   result, *targetlist,
1361                                                                                   true, /* XXX for now */
1362                                                                                   true);
1363         }
1364
1365         return result;
1366 }
1367
1368 /*
1369  * transformSortClause -
1370  *        transform an ORDER BY clause
1371  *
1372  * ORDER BY items will be added to the targetlist (as resjunk columns)
1373  * if not already present, so the targetlist must be passed by reference.
1374  */
1375 List *
1376 transformSortClause(ParseState *pstate,
1377                                         List *orderlist,
1378                                         List **targetlist,
1379                                         bool resolveUnknown)
1380 {
1381         List       *sortlist = NIL;
1382         ListCell   *olitem;
1383
1384         foreach(olitem, orderlist)
1385         {
1386                 SortBy     *sortby = (SortBy *) lfirst(olitem);
1387                 TargetEntry *tle;
1388
1389                 tle = findTargetlistEntry(pstate, sortby->node,
1390                                                                   targetlist, ORDER_CLAUSE);
1391
1392                 sortlist = addTargetToSortList(pstate, tle,
1393                                                                            sortlist, *targetlist,
1394                                                                            sortby->sortby_dir,
1395                                                                            sortby->sortby_nulls,
1396                                                                            sortby->useOp,
1397                                                                            resolveUnknown);
1398         }
1399
1400         return sortlist;
1401 }
1402
1403 /*
1404  * transformDistinctClause -
1405  *        transform a DISTINCT clause
1406  *
1407  * Since we may need to add items to the query's targetlist, that list
1408  * is passed by reference.
1409  *
1410  * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
1411  * possible into the distinctClause.  This avoids a possible need to re-sort,
1412  * and allows the user to choose the equality semantics used by DISTINCT,
1413  * should she be working with a datatype that has more than one equality
1414  * operator.
1415  */
1416 List *
1417 transformDistinctClause(ParseState *pstate,
1418                                                 List **targetlist, List *sortClause)
1419 {
1420         List       *result = NIL;
1421         ListCell   *slitem;
1422         ListCell   *tlitem;
1423
1424         /*
1425          * The distinctClause should consist of all ORDER BY items followed
1426          * by all other non-resjunk targetlist items.  There must not be any
1427          * resjunk ORDER BY items --- that would imply that we are sorting
1428          * by a value that isn't necessarily unique within a DISTINCT group,
1429          * so the results wouldn't be well-defined.  This construction
1430          * ensures we follow the rule that sortClause and distinctClause match;
1431          * in fact the sortClause will always be a prefix of distinctClause.
1432          *
1433          * Note a corner case: the same TLE could be in the ORDER BY list
1434          * multiple times with different sortops.  We have to include it in
1435          * the distinctClause the same way to preserve the prefix property.
1436          * The net effect will be that the TLE value will be made unique
1437          * according to both sortops.
1438          */
1439         foreach(slitem, sortClause)
1440         {
1441                 SortGroupClause *scl = (SortGroupClause *) lfirst(slitem);
1442                 TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1443
1444                 if (tle->resjunk)
1445                         ereport(ERROR,
1446                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1447                                          errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1448                 result = lappend(result, copyObject(scl));
1449         }
1450
1451         /*
1452          * Now add any remaining non-resjunk tlist items, using default
1453          * sort/group semantics for their data types.
1454          *
1455          * XXX for now, the planner requires distinctClause to be sortable,
1456          * so we have to insist on that here.
1457          */
1458         foreach(tlitem, *targetlist)
1459         {
1460                 TargetEntry *tle = (TargetEntry *) lfirst(tlitem);
1461
1462                 if (tle->resjunk)
1463                         continue;                       /* ignore junk */
1464                 result = addTargetToGroupList(pstate, tle,
1465                                                                           result, *targetlist,
1466                                                                           true, /* XXX for now */
1467                                                                           true);
1468         }
1469
1470         return result;
1471 }
1472
1473 /*
1474  * transformDistinctOnClause -
1475  *        transform a DISTINCT ON clause
1476  *
1477  * Since we may need to add items to the query's targetlist, that list
1478  * is passed by reference.
1479  *
1480  * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
1481  * possible into the distinctClause.  This avoids a possible need to re-sort,
1482  * and allows the user to choose the equality semantics used by DISTINCT,
1483  * should she be working with a datatype that has more than one equality
1484  * operator.
1485  */
1486 List *
1487 transformDistinctOnClause(ParseState *pstate, List *distinctlist,
1488                                                   List **targetlist, List *sortClause)
1489 {
1490         List       *result = NIL;
1491         ListCell   *slitem;
1492         ListCell   *dlitem;
1493         Bitmapset  *refnos = NULL;
1494         int                     sortgroupref;
1495         bool            skipped_sortitem;
1496
1497         /*
1498          * Add all the DISTINCT ON expressions to the tlist (if not already
1499          * present, they are added as resjunk items).  Assign sortgroupref
1500          * numbers to them, and form a bitmapset of these numbers.  (A
1501          * bitmapset is convenient here because we don't care about order
1502          * and we can discard duplicates.)
1503          */
1504         foreach(dlitem, distinctlist)
1505         {
1506                 Node       *dexpr = (Node *) lfirst(dlitem);
1507                 TargetEntry *tle;
1508
1509                 tle = findTargetlistEntry(pstate, dexpr,
1510                                                                   targetlist, DISTINCT_ON_CLAUSE);
1511                 sortgroupref = assignSortGroupRef(tle, *targetlist);
1512                 refnos = bms_add_member(refnos, sortgroupref);
1513         }
1514
1515         /*
1516          * If the user writes both DISTINCT ON and ORDER BY, adopt the
1517          * sorting semantics from ORDER BY items that match DISTINCT ON
1518          * items, and also adopt their column sort order.  We insist that
1519          * the distinctClause and sortClause match, so throw error if we
1520          * find the need to add any more distinctClause items after we've
1521          * skipped an ORDER BY item that wasn't in DISTINCT ON.
1522          */
1523         skipped_sortitem = false;
1524         foreach(slitem, sortClause)
1525         {
1526                 SortGroupClause *scl = (SortGroupClause *) lfirst(slitem);
1527
1528                 if (bms_is_member(scl->tleSortGroupRef, refnos))
1529                 {
1530                         if (skipped_sortitem)
1531                                 ereport(ERROR,
1532                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1533                                                  errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1534                         else
1535                                 result = lappend(result, copyObject(scl));
1536                 }
1537                 else
1538                         skipped_sortitem = true;
1539         }
1540
1541         /*
1542          * Now add any remaining DISTINCT ON items, using default sort/group
1543          * semantics for their data types.  (Note: this is pretty questionable;
1544          * if the ORDER BY list doesn't include all the DISTINCT ON items and more
1545          * besides, you certainly aren't using DISTINCT ON in the intended way,
1546          * and you probably aren't going to get consistent results.  It might be
1547          * better to throw an error or warning here.  But historically we've
1548          * allowed it, so keep doing so.)
1549          */
1550         while ((sortgroupref = bms_first_member(refnos)) >= 0)
1551         {
1552                 TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist);
1553
1554                 if (targetIsInSortList(tle, InvalidOid, result))
1555                         continue;                       /* already in list (with some semantics) */
1556                 if (skipped_sortitem)
1557                         ereport(ERROR,
1558                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1559                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1560                 result = addTargetToGroupList(pstate, tle,
1561                                                                           result, *targetlist,
1562                                                                           true, /* someday allow hash-only? */
1563                                                                           true);
1564         }
1565
1566         return result;
1567 }
1568
1569 /*
1570  * addTargetToSortList
1571  *              If the given targetlist entry isn't already in the SortGroupClause
1572  *              list, add it to the end of the list, using the given sort ordering
1573  *              info.
1574  *
1575  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1576  * do nothing (which implies the search for a sort operator will fail).
1577  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1578  * otherwise.
1579  *
1580  * Returns the updated SortGroupClause list.
1581  */
1582 static List *
1583 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1584                                         List *sortlist, List *targetlist,
1585                                         SortByDir sortby_dir, SortByNulls sortby_nulls,
1586                                         List *sortby_opname, bool resolveUnknown)
1587 {
1588         Oid                     restype = exprType((Node *) tle->expr);
1589         Oid                     sortop;
1590         Oid                     eqop;
1591         bool            reverse;
1592
1593         /* if tlist item is an UNKNOWN literal, change it to TEXT */
1594         if (restype == UNKNOWNOID && resolveUnknown)
1595         {
1596                 tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1597                                                                                  restype, TEXTOID, -1,
1598                                                                                  COERCION_IMPLICIT,
1599                                                                                  COERCE_IMPLICIT_CAST);
1600                 restype = TEXTOID;
1601         }
1602
1603         /* determine the sortop, eqop, and directionality */
1604         switch (sortby_dir)
1605         {
1606                 case SORTBY_DEFAULT:
1607                 case SORTBY_ASC:
1608                         get_sort_group_operators(restype,
1609                                                                          true, true, false,
1610                                                                          &sortop, &eqop, NULL);
1611                         reverse = false;
1612                         break;
1613                 case SORTBY_DESC:
1614                         get_sort_group_operators(restype,
1615                                                                          false, true, true,
1616                                                                          NULL, &eqop, &sortop);
1617                         reverse = true;
1618                         break;
1619                 case SORTBY_USING:
1620                         Assert(sortby_opname != NIL);
1621                         sortop = compatible_oper_opid(sortby_opname,
1622                                                                                   restype,
1623                                                                                   restype,
1624                                                                                   false);
1625
1626                         /*
1627                          * Verify it's a valid ordering operator, fetch the corresponding
1628                          * equality operator, and determine whether to consider it like
1629                          * ASC or DESC.
1630                          */
1631                         eqop = get_equality_op_for_ordering_op(sortop, &reverse);
1632                         if (!OidIsValid(eqop))
1633                                 ereport(ERROR,
1634                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1635                                            errmsg("operator %s is not a valid ordering operator",
1636                                                           strVal(llast(sortby_opname))),
1637                                                  errhint("Ordering operators must be \"<\" or \">\" members of btree operator families.")));
1638                         break;
1639                 default:
1640                         elog(ERROR, "unrecognized sortby_dir: %d", sortby_dir);
1641                         sortop = InvalidOid;    /* keep compiler quiet */
1642                         eqop = InvalidOid;
1643                         reverse = false;
1644                         break;
1645         }
1646
1647         /* avoid making duplicate sortlist entries */
1648         if (!targetIsInSortList(tle, sortop, sortlist))
1649         {
1650                 SortGroupClause *sortcl = makeNode(SortGroupClause);
1651
1652                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1653
1654                 sortcl->eqop = eqop;
1655                 sortcl->sortop = sortop;
1656
1657                 switch (sortby_nulls)
1658                 {
1659                         case SORTBY_NULLS_DEFAULT:
1660                                 /* NULLS FIRST is default for DESC; other way for ASC */
1661                                 sortcl->nulls_first = reverse;
1662                                 break;
1663                         case SORTBY_NULLS_FIRST:
1664                                 sortcl->nulls_first = true;
1665                                 break;
1666                         case SORTBY_NULLS_LAST:
1667                                 sortcl->nulls_first = false;
1668                                 break;
1669                         default:
1670                                 elog(ERROR, "unrecognized sortby_nulls: %d", sortby_nulls);
1671                                 break;
1672                 }
1673
1674                 sortlist = lappend(sortlist, sortcl);
1675         }
1676
1677         return sortlist;
1678 }
1679
1680 /*
1681  * addTargetToGroupList
1682  *              If the given targetlist entry isn't already in the SortGroupClause
1683  *              list, add it to the end of the list, using default sort/group
1684  *              semantics.
1685  *
1686  * This is very similar to addTargetToSortList, except that we allow the
1687  * case where only a grouping (equality) operator can be found, and that
1688  * the TLE is considered "already in the list" if it appears there with any
1689  * sorting semantics.
1690  *
1691  * If requireSortOp is TRUE, we require a sorting operator to be found too.
1692  * XXX this argument should eventually be obsolete, but for now there are
1693  * parts of the system that can't support non-sortable grouping lists.
1694  *
1695  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1696  * do nothing (which implies the search for an equality operator will fail).
1697  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1698  * otherwise.
1699  *
1700  * Returns the updated SortGroupClause list.
1701  */
1702 List *
1703 addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
1704                                          List *grouplist, List *targetlist,
1705                                          bool requireSortOp, bool resolveUnknown)
1706 {
1707         Oid                     restype = exprType((Node *) tle->expr);
1708         Oid                     sortop;
1709         Oid                     eqop;
1710
1711         /* if tlist item is an UNKNOWN literal, change it to TEXT */
1712         if (restype == UNKNOWNOID && resolveUnknown)
1713         {
1714                 tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1715                                                                                  restype, TEXTOID, -1,
1716                                                                                  COERCION_IMPLICIT,
1717                                                                                  COERCE_IMPLICIT_CAST);
1718                 restype = TEXTOID;
1719         }
1720
1721         /* avoid making duplicate grouplist entries */
1722         if (!targetIsInSortList(tle, InvalidOid, grouplist))
1723         {
1724                 SortGroupClause *grpcl = makeNode(SortGroupClause);
1725
1726                 /* determine the eqop and optional sortop */
1727                 get_sort_group_operators(restype,
1728                                                                  requireSortOp, true, false,
1729                                                                  &sortop, &eqop, NULL);
1730
1731                 grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1732                 grpcl->eqop = eqop;
1733                 grpcl->sortop = sortop;
1734                 grpcl->nulls_first = false;             /* OK with or without sortop */
1735
1736                 grouplist = lappend(grouplist, grpcl);
1737         }
1738
1739         return grouplist;
1740 }
1741
1742 /*
1743  * assignSortGroupRef
1744  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1745  *        already have one.  Return the assigned or pre-existing refnumber.
1746  *
1747  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1748  */
1749 Index
1750 assignSortGroupRef(TargetEntry *tle, List *tlist)
1751 {
1752         Index           maxRef;
1753         ListCell   *l;
1754
1755         if (tle->ressortgroupref)       /* already has one? */
1756                 return tle->ressortgroupref;
1757
1758         /* easiest way to pick an unused refnumber: max used + 1 */
1759         maxRef = 0;
1760         foreach(l, tlist)
1761         {
1762                 Index           ref = ((TargetEntry *) lfirst(l))->ressortgroupref;
1763
1764                 if (ref > maxRef)
1765                         maxRef = ref;
1766         }
1767         tle->ressortgroupref = maxRef + 1;
1768         return tle->ressortgroupref;
1769 }
1770
1771 /*
1772  * targetIsInSortList
1773  *              Is the given target item already in the sortlist?
1774  *              If sortop is not InvalidOid, also test for a match to the sortop.
1775  *
1776  * It is not an oversight that this function ignores the nulls_first flag.
1777  * We check sortop when determining if an ORDER BY item is redundant with
1778  * earlier ORDER BY items, because it's conceivable that "ORDER BY
1779  * foo USING <, foo USING <<<" is not redundant, if <<< distinguishes
1780  * values that < considers equal.  We need not check nulls_first
1781  * however, because a lower-order column with the same sortop but
1782  * opposite nulls direction is redundant.  Also, we can consider
1783  * ORDER BY foo ASC, foo DESC redundant, so check for a commutator match.
1784  *
1785  * Works for both ordering and grouping lists (sortop would normally be
1786  * InvalidOid when considering grouping).  Note that the main reason we need
1787  * this routine (and not just a quick test for nonzeroness of ressortgroupref)
1788  * is that a TLE might be in only one of the lists.
1789  */
1790 bool
1791 targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
1792 {
1793         Index           ref = tle->ressortgroupref;
1794         ListCell   *l;
1795
1796         /* no need to scan list if tle has no marker */
1797         if (ref == 0)
1798                 return false;
1799
1800         foreach(l, sortList)
1801         {
1802                 SortGroupClause *scl = (SortGroupClause *) lfirst(l);
1803
1804                 if (scl->tleSortGroupRef == ref &&
1805                         (sortop == InvalidOid ||
1806                          sortop == scl->sortop ||
1807                          sortop == get_commutator(scl->sortop)))
1808                         return true;
1809         }
1810         return false;
1811 }