]> granicus.if.org Git - postgresql/blob - src/backend/catalog/index.c
Create an ALTER DEFAULT PRIVILEGES command, which allows users to adjust
[postgresql] / src / backend / catalog / index.c
1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *        code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.322 2009/10/05 19:24:35 tgl Exp $
12  *
13  *
14  * INTERFACE ROUTINES
15  *              index_create()                  - Create a cataloged index relation
16  *              index_drop()                    - Removes index relation from catalogs
17  *              BuildIndexInfo()                - Prepare to insert index tuples
18  *              FormIndexDatum()                - Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/xact.h"
32 #include "bootstrap/bootstrap.h"
33 #include "catalog/catalog.h"
34 #include "catalog/dependency.h"
35 #include "catalog/heap.h"
36 #include "catalog/index.h"
37 #include "catalog/indexing.h"
38 #include "catalog/namespace.h"
39 #include "catalog/pg_constraint.h"
40 #include "catalog/pg_operator.h"
41 #include "catalog/pg_opclass.h"
42 #include "catalog/pg_tablespace.h"
43 #include "catalog/pg_trigger.h"
44 #include "catalog/pg_type.h"
45 #include "catalog/storage.h"
46 #include "commands/tablecmds.h"
47 #include "commands/trigger.h"
48 #include "executor/executor.h"
49 #include "miscadmin.h"
50 #include "nodes/makefuncs.h"
51 #include "nodes/nodeFuncs.h"
52 #include "optimizer/clauses.h"
53 #include "optimizer/var.h"
54 #include "parser/parser.h"
55 #include "storage/bufmgr.h"
56 #include "storage/lmgr.h"
57 #include "storage/procarray.h"
58 #include "storage/smgr.h"
59 #include "utils/builtins.h"
60 #include "utils/fmgroids.h"
61 #include "utils/inval.h"
62 #include "utils/lsyscache.h"
63 #include "utils/memutils.h"
64 #include "utils/relcache.h"
65 #include "utils/syscache.h"
66 #include "utils/tuplesort.h"
67 #include "utils/snapmgr.h"
68 #include "utils/tqual.h"
69
70
71 /* state info for validate_index bulkdelete callback */
72 typedef struct
73 {
74         Tuplesortstate *tuplesort;      /* for sorting the index TIDs */
75         /* statistics (for debug purposes only): */
76         double          htups,
77                                 itups,
78                                 tups_inserted;
79 } v_i_state;
80
81 /* non-export function prototypes */
82 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
83                                                  IndexInfo *indexInfo,
84                                                  Oid accessMethodObjectId,
85                                                  Oid *classObjectId);
86 static void InitializeAttributeOids(Relation indexRelation,
87                                                 int numatts, Oid indexoid);
88 static void AppendAttributeTuples(Relation indexRelation, int numatts);
89 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
90                                         IndexInfo *indexInfo,
91                                         Oid *classOids,
92                                         int16 *coloptions,
93                                         bool primary,
94                                         bool immediate,
95                                         bool isvalid);
96 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
97                                    Oid reltoastidxid, double reltuples);
98 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
99 static void validate_index_heapscan(Relation heapRelation,
100                                                 Relation indexRelation,
101                                                 IndexInfo *indexInfo,
102                                                 Snapshot snapshot,
103                                                 v_i_state *state);
104 static Oid      IndexGetRelation(Oid indexId);
105
106
107 /*
108  *              ConstructTupleDescriptor
109  *
110  * Build an index tuple descriptor for a new index
111  */
112 static TupleDesc
113 ConstructTupleDescriptor(Relation heapRelation,
114                                                  IndexInfo *indexInfo,
115                                                  Oid accessMethodObjectId,
116                                                  Oid *classObjectId)
117 {
118         int                     numatts = indexInfo->ii_NumIndexAttrs;
119         ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
120         HeapTuple       amtuple;
121         Form_pg_am      amform;
122         TupleDesc       heapTupDesc;
123         TupleDesc       indexTupDesc;
124         int                     natts;                  /* #atts in heap rel --- for error checks */
125         int                     i;
126
127         /* We need access to the index AM's pg_am tuple */
128         amtuple = SearchSysCache(AMOID,
129                                                          ObjectIdGetDatum(accessMethodObjectId),
130                                                          0, 0, 0);
131         if (!HeapTupleIsValid(amtuple))
132                 elog(ERROR, "cache lookup failed for access method %u",
133                          accessMethodObjectId);
134         amform = (Form_pg_am) GETSTRUCT(amtuple);
135
136         /* ... and to the table's tuple descriptor */
137         heapTupDesc = RelationGetDescr(heapRelation);
138         natts = RelationGetForm(heapRelation)->relnatts;
139
140         /*
141          * allocate the new tuple descriptor
142          */
143         indexTupDesc = CreateTemplateTupleDesc(numatts, false);
144
145         /*
146          * For simple index columns, we copy the pg_attribute row from the parent
147          * relation and modify it as necessary.  For expressions we have to cons
148          * up a pg_attribute row the hard way.
149          */
150         for (i = 0; i < numatts; i++)
151         {
152                 AttrNumber      atnum = indexInfo->ii_KeyAttrNumbers[i];
153                 Form_pg_attribute to = indexTupDesc->attrs[i];
154                 HeapTuple       tuple;
155                 Form_pg_type typeTup;
156                 Form_pg_opclass opclassTup;
157                 Oid                     keyType;
158
159                 if (atnum != 0)
160                 {
161                         /* Simple index column */
162                         Form_pg_attribute from;
163
164                         if (atnum < 0)
165                         {
166                                 /*
167                                  * here we are indexing on a system attribute (-1...-n)
168                                  */
169                                 from = SystemAttributeDefinition(atnum,
170                                                                                    heapRelation->rd_rel->relhasoids);
171                         }
172                         else
173                         {
174                                 /*
175                                  * here we are indexing on a normal attribute (1...n)
176                                  */
177                                 if (atnum > natts)              /* safety check */
178                                         elog(ERROR, "invalid column number %d", atnum);
179                                 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
180                         }
181
182                         /*
183                          * now that we've determined the "from", let's copy the tuple desc
184                          * data...
185                          */
186                         memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
187
188                         /*
189                          * Fix the stuff that should not be the same as the underlying
190                          * attr
191                          */
192                         to->attnum = i + 1;
193
194                         to->attstattarget = -1;
195                         to->attdistinct = 0;
196                         to->attcacheoff = -1;
197                         to->attnotnull = false;
198                         to->atthasdef = false;
199                         to->attislocal = true;
200                         to->attinhcount = 0;
201                 }
202                 else
203                 {
204                         /* Expressional index */
205                         Node       *indexkey;
206
207                         MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
208
209                         if (indexpr_item == NULL)       /* shouldn't happen */
210                                 elog(ERROR, "too few entries in indexprs list");
211                         indexkey = (Node *) lfirst(indexpr_item);
212                         indexpr_item = lnext(indexpr_item);
213
214                         /*
215                          * Make the attribute's name "pg_expresssion_nnn" (maybe think of
216                          * something better later)
217                          */
218                         sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
219
220                         /*
221                          * Lookup the expression type in pg_type for the type length etc.
222                          */
223                         keyType = exprType(indexkey);
224                         tuple = SearchSysCache(TYPEOID,
225                                                                    ObjectIdGetDatum(keyType),
226                                                                    0, 0, 0);
227                         if (!HeapTupleIsValid(tuple))
228                                 elog(ERROR, "cache lookup failed for type %u", keyType);
229                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
230
231                         /*
232                          * Assign some of the attributes values. Leave the rest as 0.
233                          */
234                         to->attnum = i + 1;
235                         to->atttypid = keyType;
236                         to->attlen = typeTup->typlen;
237                         to->attbyval = typeTup->typbyval;
238                         to->attstorage = typeTup->typstorage;
239                         to->attalign = typeTup->typalign;
240                         to->attstattarget = -1;
241                         to->attcacheoff = -1;
242                         to->atttypmod = -1;
243                         to->attislocal = true;
244
245                         ReleaseSysCache(tuple);
246
247                         /*
248                          * Make sure the expression yields a type that's safe to store in
249                          * an index.  We need this defense because we have index opclasses
250                          * for pseudo-types such as "record", and the actually stored type
251                          * had better be safe; eg, a named composite type is okay, an
252                          * anonymous record type is not.  The test is the same as for
253                          * whether a table column is of a safe type (which is why we
254                          * needn't check for the non-expression case).
255                          */
256                         CheckAttributeType(NameStr(to->attname), to->atttypid);
257                 }
258
259                 /*
260                  * We do not yet have the correct relation OID for the index, so just
261                  * set it invalid for now.      InitializeAttributeOids() will fix it
262                  * later.
263                  */
264                 to->attrelid = InvalidOid;
265
266                 /*
267                  * Check the opclass and index AM to see if either provides a keytype
268                  * (overriding the attribute type).  Opclass takes precedence.
269                  */
270                 tuple = SearchSysCache(CLAOID,
271                                                            ObjectIdGetDatum(classObjectId[i]),
272                                                            0, 0, 0);
273                 if (!HeapTupleIsValid(tuple))
274                         elog(ERROR, "cache lookup failed for opclass %u",
275                                  classObjectId[i]);
276                 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
277                 if (OidIsValid(opclassTup->opckeytype))
278                         keyType = opclassTup->opckeytype;
279                 else
280                         keyType = amform->amkeytype;
281                 ReleaseSysCache(tuple);
282
283                 if (OidIsValid(keyType) && keyType != to->atttypid)
284                 {
285                         /* index value and heap value have different types */
286                         tuple = SearchSysCache(TYPEOID,
287                                                                    ObjectIdGetDatum(keyType),
288                                                                    0, 0, 0);
289                         if (!HeapTupleIsValid(tuple))
290                                 elog(ERROR, "cache lookup failed for type %u", keyType);
291                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
292
293                         to->atttypid = keyType;
294                         to->atttypmod = -1;
295                         to->attlen = typeTup->typlen;
296                         to->attbyval = typeTup->typbyval;
297                         to->attalign = typeTup->typalign;
298                         to->attstorage = typeTup->typstorage;
299
300                         ReleaseSysCache(tuple);
301                 }
302         }
303
304         ReleaseSysCache(amtuple);
305
306         return indexTupDesc;
307 }
308
309 /* ----------------------------------------------------------------
310  *              InitializeAttributeOids
311  * ----------------------------------------------------------------
312  */
313 static void
314 InitializeAttributeOids(Relation indexRelation,
315                                                 int numatts,
316                                                 Oid indexoid)
317 {
318         TupleDesc       tupleDescriptor;
319         int                     i;
320
321         tupleDescriptor = RelationGetDescr(indexRelation);
322
323         for (i = 0; i < numatts; i += 1)
324                 tupleDescriptor->attrs[i]->attrelid = indexoid;
325 }
326
327 /* ----------------------------------------------------------------
328  *              AppendAttributeTuples
329  * ----------------------------------------------------------------
330  */
331 static void
332 AppendAttributeTuples(Relation indexRelation, int numatts)
333 {
334         Relation        pg_attribute;
335         CatalogIndexState indstate;
336         TupleDesc       indexTupDesc;
337         int                     i;
338
339         /*
340          * open the attribute relation and its indexes
341          */
342         pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
343
344         indstate = CatalogOpenIndexes(pg_attribute);
345
346         /*
347          * insert data from new index's tupdesc into pg_attribute
348          */
349         indexTupDesc = RelationGetDescr(indexRelation);
350
351         for (i = 0; i < numatts; i++)
352         {
353                 /*
354                  * There used to be very grotty code here to set these fields, but I
355                  * think it's unnecessary.  They should be set already.
356                  */
357                 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
358                 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
359
360                 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
361         }
362
363         CatalogCloseIndexes(indstate);
364
365         heap_close(pg_attribute, RowExclusiveLock);
366 }
367
368 /* ----------------------------------------------------------------
369  *              UpdateIndexRelation
370  *
371  * Construct and insert a new entry in the pg_index catalog
372  * ----------------------------------------------------------------
373  */
374 static void
375 UpdateIndexRelation(Oid indexoid,
376                                         Oid heapoid,
377                                         IndexInfo *indexInfo,
378                                         Oid *classOids,
379                                         int16 *coloptions,
380                                         bool primary,
381                                         bool immediate,
382                                         bool isvalid)
383 {
384         int2vector *indkey;
385         oidvector  *indclass;
386         int2vector *indoption;
387         Datum           exprsDatum;
388         Datum           predDatum;
389         Datum           values[Natts_pg_index];
390         bool            nulls[Natts_pg_index];
391         Relation        pg_index;
392         HeapTuple       tuple;
393         int                     i;
394
395         /*
396          * Copy the index key, opclass, and indoption info into arrays (should we
397          * make the caller pass them like this to start with?)
398          */
399         indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
400         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
401                 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
402         indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
403         indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
404
405         /*
406          * Convert the index expressions (if any) to a text datum
407          */
408         if (indexInfo->ii_Expressions != NIL)
409         {
410                 char       *exprsString;
411
412                 exprsString = nodeToString(indexInfo->ii_Expressions);
413                 exprsDatum = CStringGetTextDatum(exprsString);
414                 pfree(exprsString);
415         }
416         else
417                 exprsDatum = (Datum) 0;
418
419         /*
420          * Convert the index predicate (if any) to a text datum.  Note we convert
421          * implicit-AND format to normal explicit-AND for storage.
422          */
423         if (indexInfo->ii_Predicate != NIL)
424         {
425                 char       *predString;
426
427                 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
428                 predDatum = CStringGetTextDatum(predString);
429                 pfree(predString);
430         }
431         else
432                 predDatum = (Datum) 0;
433
434         /*
435          * open the system catalog index relation
436          */
437         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
438
439         /*
440          * Build a pg_index tuple
441          */
442         MemSet(nulls, false, sizeof(nulls));
443
444         values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
445         values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
446         values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
447         values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
448         values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
449         values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
450         values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
451         values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
452         values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
453         /* we set isvalid and isready the same way */
454         values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
455         values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
456         values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
457         values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
458         values[Anum_pg_index_indexprs - 1] = exprsDatum;
459         if (exprsDatum == (Datum) 0)
460                 nulls[Anum_pg_index_indexprs - 1] = true;
461         values[Anum_pg_index_indpred - 1] = predDatum;
462         if (predDatum == (Datum) 0)
463                 nulls[Anum_pg_index_indpred - 1] = true;
464
465         tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
466
467         /*
468          * insert the tuple into the pg_index catalog
469          */
470         simple_heap_insert(pg_index, tuple);
471
472         /* update the indexes on pg_index */
473         CatalogUpdateIndexes(pg_index, tuple);
474
475         /*
476          * close the relation and free the tuple
477          */
478         heap_close(pg_index, RowExclusiveLock);
479         heap_freetuple(tuple);
480 }
481
482
483 /*
484  * index_create
485  *
486  * heapRelationId: OID of table to build index on
487  * indexRelationName: what it say
488  * indexRelationId: normally, pass InvalidOid to let this routine
489  *              generate an OID for the index.  During bootstrap this may be
490  *              nonzero to specify a preselected OID.
491  * indexInfo: same info executor uses to insert into the index
492  * accessMethodObjectId: OID of index AM to use
493  * tableSpaceId: OID of tablespace to use
494  * classObjectId: array of index opclass OIDs, one per index column
495  * coloptions: array of per-index-column indoption settings
496  * reloptions: AM-specific options
497  * isprimary: index is a PRIMARY KEY
498  * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
499  * deferrable: constraint is DEFERRABLE
500  * initdeferred: constraint is INITIALLY DEFERRED
501  * allow_system_table_mods: allow table to be a system catalog
502  * skip_build: true to skip the index_build() step for the moment; caller
503  *              must do it later (typically via reindex_index())
504  * concurrent: if true, do not lock the table against writers.  The index
505  *              will be marked "invalid" and the caller must take additional steps
506  *              to fix it up.
507  *
508  * Returns OID of the created index.
509  */
510 Oid
511 index_create(Oid heapRelationId,
512                          const char *indexRelationName,
513                          Oid indexRelationId,
514                          IndexInfo *indexInfo,
515                          Oid accessMethodObjectId,
516                          Oid tableSpaceId,
517                          Oid *classObjectId,
518                          int16 *coloptions,
519                          Datum reloptions,
520                          bool isprimary,
521                          bool isconstraint,
522                          bool deferrable,
523                          bool initdeferred,
524                          bool allow_system_table_mods,
525                          bool skip_build,
526                          bool concurrent)
527 {
528         Relation        pg_class;
529         Relation        heapRelation;
530         Relation        indexRelation;
531         TupleDesc       indexTupDesc;
532         bool            shared_relation;
533         Oid                     namespaceId;
534         int                     i;
535
536         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
537
538         /*
539          * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
540          * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
541          * (but not VACUUM).
542          */
543         heapRelation = heap_open(heapRelationId,
544                                                 (concurrent ? ShareUpdateExclusiveLock : ShareLock));
545
546         /*
547          * The index will be in the same namespace as its parent table, and is
548          * shared across databases if and only if the parent is.
549          */
550         namespaceId = RelationGetNamespace(heapRelation);
551         shared_relation = heapRelation->rd_rel->relisshared;
552
553         /*
554          * check parameters
555          */
556         if (indexInfo->ii_NumIndexAttrs < 1)
557                 elog(ERROR, "must index at least one column");
558
559         if (!allow_system_table_mods &&
560                 IsSystemRelation(heapRelation) &&
561                 IsNormalProcessingMode())
562                 ereport(ERROR,
563                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
564                                  errmsg("user-defined indexes on system catalog tables are not supported")));
565
566         /*
567          * concurrent index build on a system catalog is unsafe because we tend to
568          * release locks before committing in catalogs
569          */
570         if (concurrent &&
571                 IsSystemRelation(heapRelation))
572                 ereport(ERROR,
573                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
574                                  errmsg("concurrent index creation on system catalog tables is not supported")));
575
576         /*
577          * We cannot allow indexing a shared relation after initdb (because
578          * there's no way to make the entry in other databases' pg_class).
579          */
580         if (shared_relation && !IsBootstrapProcessingMode())
581                 ereport(ERROR,
582                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
583                                  errmsg("shared indexes cannot be created after initdb")));
584
585         /*
586          * Validate shared/non-shared tablespace (must check this before doing
587          * GetNewRelFileNode, to prevent Assert therein)
588          */
589         if (shared_relation)
590         {
591                 if (tableSpaceId != GLOBALTABLESPACE_OID)
592                         /* elog since this is not a user-facing error */
593                         elog(ERROR,
594                                  "shared relations must be placed in pg_global tablespace");
595         }
596         else
597         {
598                 if (tableSpaceId == GLOBALTABLESPACE_OID)
599                         ereport(ERROR,
600                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
601                                          errmsg("only shared relations can be placed in pg_global tablespace")));
602         }
603
604         if (get_relname_relid(indexRelationName, namespaceId))
605                 ereport(ERROR,
606                                 (errcode(ERRCODE_DUPLICATE_TABLE),
607                                  errmsg("relation \"%s\" already exists",
608                                                 indexRelationName)));
609
610         /*
611          * construct tuple descriptor for index tuples
612          */
613         indexTupDesc = ConstructTupleDescriptor(heapRelation,
614                                                                                         indexInfo,
615                                                                                         accessMethodObjectId,
616                                                                                         classObjectId);
617
618         /*
619          * Allocate an OID for the index, unless we were told what to use.
620          *
621          * The OID will be the relfilenode as well, so make sure it doesn't
622          * collide with either pg_class OIDs or existing physical files.
623          */
624         if (!OidIsValid(indexRelationId))
625                 indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
626                                                                                         pg_class);
627
628         /*
629          * create the index relation's relcache entry and physical disk file. (If
630          * we fail further down, it's the smgr's responsibility to remove the disk
631          * file again.)
632          */
633         indexRelation = heap_create(indexRelationName,
634                                                                 namespaceId,
635                                                                 tableSpaceId,
636                                                                 indexRelationId,
637                                                                 indexTupDesc,
638                                                                 RELKIND_INDEX,
639                                                                 shared_relation,
640                                                                 allow_system_table_mods);
641
642         Assert(indexRelationId == RelationGetRelid(indexRelation));
643
644         /*
645          * Obtain exclusive lock on it.  Although no other backends can see it
646          * until we commit, this prevents deadlock-risk complaints from lock
647          * manager in cases such as CLUSTER.
648          */
649         LockRelation(indexRelation, AccessExclusiveLock);
650
651         /*
652          * Fill in fields of the index's pg_class entry that are not set correctly
653          * by heap_create.
654          *
655          * XXX should have a cleaner way to create cataloged indexes
656          */
657         indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
658         indexRelation->rd_rel->relam = accessMethodObjectId;
659         indexRelation->rd_rel->relkind = RELKIND_INDEX;
660         indexRelation->rd_rel->relhasoids = false;
661
662         /*
663          * store index's pg_class entry
664          */
665         InsertPgClassTuple(pg_class, indexRelation,
666                                            RelationGetRelid(indexRelation),
667                                            (Datum) 0,
668                                            reloptions);
669
670         /* done with pg_class */
671         heap_close(pg_class, RowExclusiveLock);
672
673         /*
674          * now update the object id's of all the attribute tuple forms in the
675          * index relation's tuple descriptor
676          */
677         InitializeAttributeOids(indexRelation,
678                                                         indexInfo->ii_NumIndexAttrs,
679                                                         indexRelationId);
680
681         /*
682          * append ATTRIBUTE tuples for the index
683          */
684         AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
685
686         /* ----------------
687          *        update pg_index
688          *        (append INDEX tuple)
689          *
690          *        Note that this stows away a representation of "predicate".
691          *        (Or, could define a rule to maintain the predicate) --Nels, Feb '92
692          * ----------------
693          */
694         UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
695                                                 classObjectId, coloptions, isprimary,
696                                                 !deferrable,
697                                                 !concurrent);
698
699         /*
700          * Register constraint and dependencies for the index.
701          *
702          * If the index is from a CONSTRAINT clause, construct a pg_constraint
703          * entry. The index is then linked to the constraint, which in turn is
704          * linked to the table.  If it's not a CONSTRAINT, make the dependency
705          * directly on the table.
706          *
707          * We don't need a dependency on the namespace, because there'll be an
708          * indirect dependency via our parent table.
709          *
710          * During bootstrap we can't register any dependencies, and we don't try
711          * to make a constraint either.
712          */
713         if (!IsBootstrapProcessingMode())
714         {
715                 ObjectAddress myself,
716                                         referenced;
717
718                 myself.classId = RelationRelationId;
719                 myself.objectId = indexRelationId;
720                 myself.objectSubId = 0;
721
722                 if (isconstraint)
723                 {
724                         char            constraintType;
725                         Oid                     conOid;
726
727                         if (isprimary)
728                                 constraintType = CONSTRAINT_PRIMARY;
729                         else if (indexInfo->ii_Unique)
730                                 constraintType = CONSTRAINT_UNIQUE;
731                         else
732                         {
733                                 elog(ERROR, "constraint must be PRIMARY or UNIQUE");
734                                 constraintType = 0;             /* keep compiler quiet */
735                         }
736
737                         /* Shouldn't have any expressions */
738                         if (indexInfo->ii_Expressions)
739                                 elog(ERROR, "constraints cannot have index expressions");
740
741                         conOid = CreateConstraintEntry(indexRelationName,
742                                                                                    namespaceId,
743                                                                                    constraintType,
744                                                                                    deferrable,
745                                                                                    initdeferred,
746                                                                                    heapRelationId,
747                                                                                    indexInfo->ii_KeyAttrNumbers,
748                                                                                    indexInfo->ii_NumIndexAttrs,
749                                                                                    InvalidOid,  /* no domain */
750                                                                                    indexRelationId,     /* index OID */
751                                                                                    InvalidOid,  /* no foreign key */
752                                                                                    NULL,
753                                                                                    NULL,
754                                                                                    NULL,
755                                                                                    NULL,
756                                                                                    0,
757                                                                                    ' ',
758                                                                                    ' ',
759                                                                                    ' ',
760                                                                                    NULL,                /* no check constraint */
761                                                                                    NULL,
762                                                                                    NULL,
763                                                                                    true,                /* islocal */
764                                                                                    0);  /* inhcount */
765
766                         referenced.classId = ConstraintRelationId;
767                         referenced.objectId = conOid;
768                         referenced.objectSubId = 0;
769
770                         recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
771
772                         /*
773                          * If the constraint is deferrable, create the deferred uniqueness
774                          * checking trigger.  (The trigger will be given an internal
775                          * dependency on the constraint by CreateTrigger, so there's no
776                          * need to do anything more here.)
777                          */
778                         if (deferrable)
779                         {
780                                 RangeVar   *heapRel;
781                                 CreateTrigStmt *trigger;
782
783                                 heapRel = makeRangeVar(get_namespace_name(namespaceId),
784                                                                            pstrdup(RelationGetRelationName(heapRelation)),
785                                                                            -1);
786
787                                 trigger = makeNode(CreateTrigStmt);
788                                 trigger->trigname = pstrdup(indexRelationName);
789                                 trigger->relation = heapRel;
790                                 trigger->funcname = SystemFuncName("unique_key_recheck");
791                                 trigger->args = NIL;
792                                 trigger->before = false;
793                                 trigger->row = true;
794                                 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
795                                 trigger->isconstraint = true;
796                                 trigger->deferrable = true;
797                                 trigger->initdeferred = initdeferred;
798                                 trigger->constrrel = NULL;
799
800                                 (void) CreateTrigger(trigger, conOid, indexRelationId,
801                                                                          isprimary ? "PK_ConstraintTrigger" :
802                                                                          "Unique_ConstraintTrigger",
803                                                                          false);
804                         }
805                 }
806                 else
807                 {
808                         bool            have_simple_col = false;
809
810                         /* Create auto dependencies on simply-referenced columns */
811                         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
812                         {
813                                 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
814                                 {
815                                         referenced.classId = RelationRelationId;
816                                         referenced.objectId = heapRelationId;
817                                         referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
818
819                                         recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
820
821                                         have_simple_col = true;
822                                 }
823                         }
824
825                         /*
826                          * It's possible for an index to not depend on any columns of the
827                          * table at all, in which case we need to give it a dependency on
828                          * the table as a whole; else it won't get dropped when the table
829                          * is dropped.  This edge case is not totally useless; for
830                          * example, a unique index on a constant expression can serve to
831                          * prevent a table from containing more than one row.
832                          */
833                         if (!have_simple_col &&
834                          !contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
835                                 !contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
836                         {
837                                 referenced.classId = RelationRelationId;
838                                 referenced.objectId = heapRelationId;
839                                 referenced.objectSubId = 0;
840
841                                 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
842                         }
843
844                         /* Non-constraint indexes can't be deferrable */
845                         Assert(!deferrable);
846                         Assert(!initdeferred);
847                 }
848
849                 /* Store dependency on operator classes */
850                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
851                 {
852                         referenced.classId = OperatorClassRelationId;
853                         referenced.objectId = classObjectId[i];
854                         referenced.objectSubId = 0;
855
856                         recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
857                 }
858
859                 /* Store dependencies on anything mentioned in index expressions */
860                 if (indexInfo->ii_Expressions)
861                 {
862                         recordDependencyOnSingleRelExpr(&myself,
863                                                                                   (Node *) indexInfo->ii_Expressions,
864                                                                                         heapRelationId,
865                                                                                         DEPENDENCY_NORMAL,
866                                                                                         DEPENDENCY_AUTO);
867                 }
868
869                 /* Store dependencies on anything mentioned in predicate */
870                 if (indexInfo->ii_Predicate)
871                 {
872                         recordDependencyOnSingleRelExpr(&myself,
873                                                                                         (Node *) indexInfo->ii_Predicate,
874                                                                                         heapRelationId,
875                                                                                         DEPENDENCY_NORMAL,
876                                                                                         DEPENDENCY_AUTO);
877                 }
878         }
879         else
880         {
881                 /* Bootstrap mode - assert we weren't asked for constraint support */
882                 Assert(!isconstraint);
883                 Assert(!deferrable);
884                 Assert(!initdeferred);
885         }
886
887         /*
888          * Advance the command counter so that we can see the newly-entered
889          * catalog tuples for the index.
890          */
891         CommandCounterIncrement();
892
893         /*
894          * In bootstrap mode, we have to fill in the index strategy structure with
895          * information from the catalogs.  If we aren't bootstrapping, then the
896          * relcache entry has already been rebuilt thanks to sinval update during
897          * CommandCounterIncrement.
898          */
899         if (IsBootstrapProcessingMode())
900                 RelationInitIndexAccessInfo(indexRelation);
901         else
902                 Assert(indexRelation->rd_indexcxt != NULL);
903
904         /*
905          * If this is bootstrap (initdb) time, then we don't actually fill in the
906          * index yet.  We'll be creating more indexes and classes later, so we
907          * delay filling them in until just before we're done with bootstrapping.
908          * Similarly, if the caller specified skip_build then filling the index is
909          * delayed till later (ALTER TABLE can save work in some cases with this).
910          * Otherwise, we call the AM routine that constructs the index.
911          */
912         if (IsBootstrapProcessingMode())
913         {
914                 index_register(heapRelationId, indexRelationId, indexInfo);
915         }
916         else if (skip_build)
917         {
918                 /*
919                  * Caller is responsible for filling the index later on.  However,
920                  * we'd better make sure that the heap relation is correctly marked as
921                  * having an index.
922                  */
923                 index_update_stats(heapRelation,
924                                                    true,
925                                                    isprimary,
926                                                    InvalidOid,
927                                                    heapRelation->rd_rel->reltuples);
928                 /* Make the above update visible */
929                 CommandCounterIncrement();
930         }
931         else
932         {
933                 index_build(heapRelation, indexRelation, indexInfo, isprimary);
934         }
935
936         /*
937          * Close the heap and index; but we keep the locks that we acquired above
938          * until end of transaction.
939          */
940         index_close(indexRelation, NoLock);
941         heap_close(heapRelation, NoLock);
942
943         return indexRelationId;
944 }
945
946 /*
947  *              index_drop
948  *
949  * NOTE: this routine should now only be called through performDeletion(),
950  * else associated dependencies won't be cleaned up.
951  */
952 void
953 index_drop(Oid indexId)
954 {
955         Oid                     heapId;
956         Relation        userHeapRelation;
957         Relation        userIndexRelation;
958         Relation        indexRelation;
959         HeapTuple       tuple;
960         bool            hasexprs;
961
962         /*
963          * To drop an index safely, we must grab exclusive lock on its parent
964          * table.  Exclusive lock on the index alone is insufficient because
965          * another backend might be about to execute a query on the parent table.
966          * If it relies on a previously cached list of index OIDs, then it could
967          * attempt to access the just-dropped index.  We must therefore take a
968          * table lock strong enough to prevent all queries on the table from
969          * proceeding until we commit and send out a shared-cache-inval notice
970          * that will make them update their index lists.
971          */
972         heapId = IndexGetRelation(indexId);
973         userHeapRelation = heap_open(heapId, AccessExclusiveLock);
974
975         userIndexRelation = index_open(indexId, AccessExclusiveLock);
976
977         /*
978          * Schedule physical removal of the files
979          */
980         RelationDropStorage(userIndexRelation);
981
982         /*
983          * Close and flush the index's relcache entry, to ensure relcache doesn't
984          * try to rebuild it while we're deleting catalog entries. We keep the
985          * lock though.
986          */
987         index_close(userIndexRelation, NoLock);
988
989         RelationForgetRelation(indexId);
990
991         /*
992          * fix INDEX relation, and check for expressional index
993          */
994         indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
995
996         tuple = SearchSysCache(INDEXRELID,
997                                                    ObjectIdGetDatum(indexId),
998                                                    0, 0, 0);
999         if (!HeapTupleIsValid(tuple))
1000                 elog(ERROR, "cache lookup failed for index %u", indexId);
1001
1002         hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1003
1004         simple_heap_delete(indexRelation, &tuple->t_self);
1005
1006         ReleaseSysCache(tuple);
1007         heap_close(indexRelation, RowExclusiveLock);
1008
1009         /*
1010          * if it has any expression columns, we might have stored statistics about
1011          * them.
1012          */
1013         if (hasexprs)
1014                 RemoveStatistics(indexId, 0);
1015
1016         /*
1017          * fix ATTRIBUTE relation
1018          */
1019         DeleteAttributeTuples(indexId);
1020
1021         /*
1022          * fix RELATION relation
1023          */
1024         DeleteRelationTuple(indexId);
1025
1026         /*
1027          * We are presently too lazy to attempt to compute the new correct value
1028          * of relhasindex (the next VACUUM will fix it if necessary). So there is
1029          * no need to update the pg_class tuple for the owning relation. But we
1030          * must send out a shared-cache-inval notice on the owning relation to
1031          * ensure other backends update their relcache lists of indexes.
1032          */
1033         CacheInvalidateRelcache(userHeapRelation);
1034
1035         /*
1036          * Close owning rel, but keep lock
1037          */
1038         heap_close(userHeapRelation, NoLock);
1039 }
1040
1041 /* ----------------------------------------------------------------
1042  *                                              index_build support
1043  * ----------------------------------------------------------------
1044  */
1045
1046 /* ----------------
1047  *              BuildIndexInfo
1048  *                      Construct an IndexInfo record for an open index
1049  *
1050  * IndexInfo stores the information about the index that's needed by
1051  * FormIndexDatum, which is used for both index_build() and later insertion
1052  * of individual index tuples.  Normally we build an IndexInfo for an index
1053  * just once per command, and then use it for (potentially) many tuples.
1054  * ----------------
1055  */
1056 IndexInfo *
1057 BuildIndexInfo(Relation index)
1058 {
1059         IndexInfo  *ii = makeNode(IndexInfo);
1060         Form_pg_index indexStruct = index->rd_index;
1061         int                     i;
1062         int                     numKeys;
1063
1064         /* check the number of keys, and copy attr numbers into the IndexInfo */
1065         numKeys = indexStruct->indnatts;
1066         if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1067                 elog(ERROR, "invalid indnatts %d for index %u",
1068                          numKeys, RelationGetRelid(index));
1069         ii->ii_NumIndexAttrs = numKeys;
1070         for (i = 0; i < numKeys; i++)
1071                 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1072
1073         /* fetch any expressions needed for expressional indexes */
1074         ii->ii_Expressions = RelationGetIndexExpressions(index);
1075         ii->ii_ExpressionsState = NIL;
1076
1077         /* fetch index predicate if any */
1078         ii->ii_Predicate = RelationGetIndexPredicate(index);
1079         ii->ii_PredicateState = NIL;
1080
1081         /* other info */
1082         ii->ii_Unique = indexStruct->indisunique;
1083         ii->ii_ReadyForInserts = indexStruct->indisready;
1084
1085         /* initialize index-build state to default */
1086         ii->ii_Concurrent = false;
1087         ii->ii_BrokenHotChain = false;
1088
1089         return ii;
1090 }
1091
1092 /* ----------------
1093  *              FormIndexDatum
1094  *                      Construct values[] and isnull[] arrays for a new index tuple.
1095  *
1096  *      indexInfo               Info about the index
1097  *      slot                    Heap tuple for which we must prepare an index entry
1098  *      estate                  executor state for evaluating any index expressions
1099  *      values                  Array of index Datums (output area)
1100  *      isnull                  Array of is-null indicators (output area)
1101  *
1102  * When there are no index expressions, estate may be NULL.  Otherwise it
1103  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1104  * context must point to the heap tuple passed in.
1105  *
1106  * Notice we don't actually call index_form_tuple() here; we just prepare
1107  * its input arrays values[] and isnull[].      This is because the index AM
1108  * may wish to alter the data before storage.
1109  * ----------------
1110  */
1111 void
1112 FormIndexDatum(IndexInfo *indexInfo,
1113                            TupleTableSlot *slot,
1114                            EState *estate,
1115                            Datum *values,
1116                            bool *isnull)
1117 {
1118         ListCell   *indexpr_item;
1119         int                     i;
1120
1121         if (indexInfo->ii_Expressions != NIL &&
1122                 indexInfo->ii_ExpressionsState == NIL)
1123         {
1124                 /* First time through, set up expression evaluation state */
1125                 indexInfo->ii_ExpressionsState = (List *)
1126                         ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1127                                                         estate);
1128                 /* Check caller has set up context correctly */
1129                 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1130         }
1131         indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1132
1133         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1134         {
1135                 int                     keycol = indexInfo->ii_KeyAttrNumbers[i];
1136                 Datum           iDatum;
1137                 bool            isNull;
1138
1139                 if (keycol != 0)
1140                 {
1141                         /*
1142                          * Plain index column; get the value we need directly from the
1143                          * heap tuple.
1144                          */
1145                         iDatum = slot_getattr(slot, keycol, &isNull);
1146                 }
1147                 else
1148                 {
1149                         /*
1150                          * Index expression --- need to evaluate it.
1151                          */
1152                         if (indexpr_item == NULL)
1153                                 elog(ERROR, "wrong number of index expressions");
1154                         iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1155                                                                                            GetPerTupleExprContext(estate),
1156                                                                                            &isNull,
1157                                                                                            NULL);
1158                         indexpr_item = lnext(indexpr_item);
1159                 }
1160                 values[i] = iDatum;
1161                 isnull[i] = isNull;
1162         }
1163
1164         if (indexpr_item != NULL)
1165                 elog(ERROR, "wrong number of index expressions");
1166 }
1167
1168
1169 /*
1170  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1171  *
1172  * This routine updates the pg_class row of either an index or its parent
1173  * relation after CREATE INDEX or REINDEX.      Its rather bizarre API is designed
1174  * to ensure we can do all the necessary work in just one update.
1175  *
1176  * hasindex: set relhasindex to this value
1177  * isprimary: if true, set relhaspkey true; else no change
1178  * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1179  *              else no change
1180  * reltuples: set reltuples to this value
1181  *
1182  * relpages is also updated (using RelationGetNumberOfBlocks()).
1183  *
1184  * NOTE: an important side-effect of this operation is that an SI invalidation
1185  * message is sent out to all backends --- including me --- causing relcache
1186  * entries to be flushed or updated with the new data.  This must happen even
1187  * if we find that no change is needed in the pg_class row.  When updating
1188  * a heap entry, this ensures that other backends find out about the new
1189  * index.  When updating an index, it's important because some index AMs
1190  * expect a relcache flush to occur after REINDEX.
1191  */
1192 static void
1193 index_update_stats(Relation rel, bool hasindex, bool isprimary,
1194                                    Oid reltoastidxid, double reltuples)
1195 {
1196         BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1197         Oid                     relid = RelationGetRelid(rel);
1198         Relation        pg_class;
1199         HeapTuple       tuple;
1200         Form_pg_class rd_rel;
1201         bool            dirty;
1202
1203         /*
1204          * We always update the pg_class row using a non-transactional,
1205          * overwrite-in-place update.  There are several reasons for this:
1206          *
1207          * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1208          *
1209          * 2. We could be reindexing pg_class itself, in which case we can't move
1210          * its pg_class row because CatalogUpdateIndexes might not know about all
1211          * the indexes yet (see reindex_relation).
1212          *
1213          * 3. Because we execute CREATE INDEX with just share lock on the parent
1214          * rel (to allow concurrent index creations), an ordinary update could
1215          * suffer a tuple-concurrently-updated failure against another CREATE
1216          * INDEX committing at about the same time.  We can avoid that by having
1217          * them both do nontransactional updates (we assume they will both be
1218          * trying to change the pg_class row to the same thing, so it doesn't
1219          * matter which goes first).
1220          *
1221          * 4. Even with just a single CREATE INDEX, there's a risk factor because
1222          * someone else might be trying to open the rel while we commit, and this
1223          * creates a race condition as to whether he will see both or neither of
1224          * the pg_class row versions as valid.  Again, a non-transactional update
1225          * avoids the risk.  It is indeterminate which state of the row the other
1226          * process will see, but it doesn't matter (if he's only taking
1227          * AccessShareLock, then it's not critical that he see relhasindex true).
1228          *
1229          * It is safe to use a non-transactional update even though our
1230          * transaction could still fail before committing.      Setting relhasindex
1231          * true is safe even if there are no indexes (VACUUM will eventually fix
1232          * it), and of course the relpages and reltuples counts are correct (or at
1233          * least more so than the old values) regardless.
1234          */
1235
1236         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1237
1238         /*
1239          * Make a copy of the tuple to update.  Normally we use the syscache, but
1240          * we can't rely on that during bootstrap or while reindexing pg_class
1241          * itself.
1242          */
1243         if (IsBootstrapProcessingMode() ||
1244                 ReindexIsProcessingHeap(RelationRelationId))
1245         {
1246                 /* don't assume syscache will work */
1247                 HeapScanDesc pg_class_scan;
1248                 ScanKeyData key[1];
1249
1250                 ScanKeyInit(&key[0],
1251                                         ObjectIdAttributeNumber,
1252                                         BTEqualStrategyNumber, F_OIDEQ,
1253                                         ObjectIdGetDatum(relid));
1254
1255                 pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1256                 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1257                 tuple = heap_copytuple(tuple);
1258                 heap_endscan(pg_class_scan);
1259         }
1260         else
1261         {
1262                 /* normal case, use syscache */
1263                 tuple = SearchSysCacheCopy(RELOID,
1264                                                                    ObjectIdGetDatum(relid),
1265                                                                    0, 0, 0);
1266         }
1267
1268         if (!HeapTupleIsValid(tuple))
1269                 elog(ERROR, "could not find tuple for relation %u", relid);
1270         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1271
1272         /* Apply required updates, if any, to copied tuple */
1273
1274         dirty = false;
1275         if (rd_rel->relhasindex != hasindex)
1276         {
1277                 rd_rel->relhasindex = hasindex;
1278                 dirty = true;
1279         }
1280         if (isprimary)
1281         {
1282                 if (!rd_rel->relhaspkey)
1283                 {
1284                         rd_rel->relhaspkey = true;
1285                         dirty = true;
1286                 }
1287         }
1288         if (OidIsValid(reltoastidxid))
1289         {
1290                 Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1291                 if (rd_rel->reltoastidxid != reltoastidxid)
1292                 {
1293                         rd_rel->reltoastidxid = reltoastidxid;
1294                         dirty = true;
1295                 }
1296         }
1297         if (rd_rel->reltuples != (float4) reltuples)
1298         {
1299                 rd_rel->reltuples = (float4) reltuples;
1300                 dirty = true;
1301         }
1302         if (rd_rel->relpages != (int32) relpages)
1303         {
1304                 rd_rel->relpages = (int32) relpages;
1305                 dirty = true;
1306         }
1307
1308         /*
1309          * If anything changed, write out the tuple
1310          */
1311         if (dirty)
1312         {
1313                 heap_inplace_update(pg_class, tuple);
1314                 /* the above sends a cache inval message */
1315         }
1316         else
1317         {
1318                 /* no need to change tuple, but force relcache inval anyway */
1319                 CacheInvalidateRelcacheByTuple(tuple);
1320         }
1321
1322         heap_freetuple(tuple);
1323
1324         heap_close(pg_class, RowExclusiveLock);
1325 }
1326
1327 /*
1328  * setNewRelfilenode            - assign a new relfilenode value to the relation
1329  *
1330  * Caller must already hold exclusive lock on the relation.
1331  *
1332  * The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
1333  * must be passed for indexes)
1334  */
1335 void
1336 setNewRelfilenode(Relation relation, TransactionId freezeXid)
1337 {
1338         Oid                     newrelfilenode;
1339         RelFileNode newrnode;
1340         Relation        pg_class;
1341         HeapTuple       tuple;
1342         Form_pg_class rd_rel;
1343
1344         /* Can't change relfilenode for nailed tables (indexes ok though) */
1345         Assert(!relation->rd_isnailed ||
1346                    relation->rd_rel->relkind == RELKIND_INDEX);
1347         /* Can't change for shared tables or indexes */
1348         Assert(!relation->rd_rel->relisshared);
1349         /* Indexes must have Invalid frozenxid; other relations must not */
1350         Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
1351                         freezeXid == InvalidTransactionId) ||
1352                    TransactionIdIsNormal(freezeXid));
1353
1354         /* Allocate a new relfilenode */
1355         newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1356                                                                            relation->rd_rel->relisshared,
1357                                                                            NULL);
1358
1359         /*
1360          * Find the pg_class tuple for the given relation.      This is not used
1361          * during bootstrap, so okay to use heap_update always.
1362          */
1363         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1364
1365         tuple = SearchSysCacheCopy(RELOID,
1366                                                            ObjectIdGetDatum(RelationGetRelid(relation)),
1367                                                            0, 0, 0);
1368         if (!HeapTupleIsValid(tuple))
1369                 elog(ERROR, "could not find tuple for relation %u",
1370                          RelationGetRelid(relation));
1371         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1372
1373         /*
1374          * ... and create storage for corresponding forks in the new relfilenode.
1375          *
1376          * NOTE: any conflict in relfilenode value will be caught here
1377          */
1378         newrnode = relation->rd_node;
1379         newrnode.relNode = newrelfilenode;
1380
1381         /*
1382          * Create the main fork, like heap_create() does, and drop the old
1383          * storage.
1384          */
1385         RelationCreateStorage(newrnode, relation->rd_istemp);
1386         smgrclosenode(newrnode);
1387         RelationDropStorage(relation);
1388
1389         /* update the pg_class row */
1390         rd_rel->relfilenode = newrelfilenode;
1391         rd_rel->relpages = 0;           /* it's empty until further notice */
1392         rd_rel->reltuples = 0;
1393         rd_rel->relfrozenxid = freezeXid;
1394         simple_heap_update(pg_class, &tuple->t_self, tuple);
1395         CatalogUpdateIndexes(pg_class, tuple);
1396
1397         heap_freetuple(tuple);
1398
1399         heap_close(pg_class, RowExclusiveLock);
1400
1401         /* Make sure the relfilenode change is visible */
1402         CommandCounterIncrement();
1403
1404         /* Mark the rel as having a new relfilenode in current transaction */
1405         RelationCacheMarkNewRelfilenode(relation);
1406 }
1407
1408
1409 /*
1410  * index_build - invoke access-method-specific index build procedure
1411  *
1412  * On entry, the index's catalog entries are valid, and its physical disk
1413  * file has been created but is empty.  We call the AM-specific build
1414  * procedure to fill in the index contents.  We then update the pg_class
1415  * entries of the index and heap relation as needed, using statistics
1416  * returned by ambuild as well as data passed by the caller.
1417  *
1418  * Note: when reindexing an existing index, isprimary can be false;
1419  * the index is already properly marked and need not be re-marked.
1420  *
1421  * Note: before Postgres 8.2, the passed-in heap and index Relations
1422  * were automatically closed by this routine.  This is no longer the case.
1423  * The caller opened 'em, and the caller should close 'em.
1424  */
1425 void
1426 index_build(Relation heapRelation,
1427                         Relation indexRelation,
1428                         IndexInfo *indexInfo,
1429                         bool isprimary)
1430 {
1431         RegProcedure procedure;
1432         IndexBuildResult *stats;
1433         Oid                     save_userid;
1434         bool            save_secdefcxt;
1435
1436         /*
1437          * sanity checks
1438          */
1439         Assert(RelationIsValid(indexRelation));
1440         Assert(PointerIsValid(indexRelation->rd_am));
1441
1442         procedure = indexRelation->rd_am->ambuild;
1443         Assert(RegProcedureIsValid(procedure));
1444
1445         /*
1446          * Switch to the table owner's userid, so that any index functions are run
1447          * as that user.
1448          */
1449         GetUserIdAndContext(&save_userid, &save_secdefcxt);
1450         SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1451
1452         /*
1453          * Call the access method's build procedure
1454          */
1455         stats = (IndexBuildResult *)
1456                 DatumGetPointer(OidFunctionCall3(procedure,
1457                                                                                  PointerGetDatum(heapRelation),
1458                                                                                  PointerGetDatum(indexRelation),
1459                                                                                  PointerGetDatum(indexInfo)));
1460         Assert(PointerIsValid(stats));
1461
1462         /* Restore userid */
1463         SetUserIdAndContext(save_userid, save_secdefcxt);
1464
1465         /*
1466          * If we found any potentially broken HOT chains, mark the index as not
1467          * being usable until the current transaction is below the event horizon.
1468          * See src/backend/access/heap/README.HOT for discussion.
1469          */
1470         if (indexInfo->ii_BrokenHotChain)
1471         {
1472                 Oid                     indexId = RelationGetRelid(indexRelation);
1473                 Relation        pg_index;
1474                 HeapTuple       indexTuple;
1475                 Form_pg_index indexForm;
1476
1477                 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1478
1479                 indexTuple = SearchSysCacheCopy(INDEXRELID,
1480                                                                                 ObjectIdGetDatum(indexId),
1481                                                                                 0, 0, 0);
1482                 if (!HeapTupleIsValid(indexTuple))
1483                         elog(ERROR, "cache lookup failed for index %u", indexId);
1484                 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1485
1486                 indexForm->indcheckxmin = true;
1487                 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1488                 CatalogUpdateIndexes(pg_index, indexTuple);
1489
1490                 heap_freetuple(indexTuple);
1491                 heap_close(pg_index, RowExclusiveLock);
1492         }
1493
1494         /*
1495          * Update heap and index pg_class rows
1496          */
1497         index_update_stats(heapRelation,
1498                                            true,
1499                                            isprimary,
1500                                            (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1501                                            RelationGetRelid(indexRelation) : InvalidOid,
1502                                            stats->heap_tuples);
1503
1504         index_update_stats(indexRelation,
1505                                            false,
1506                                            false,
1507                                            InvalidOid,
1508                                            stats->index_tuples);
1509
1510         /* Make the updated versions visible */
1511         CommandCounterIncrement();
1512 }
1513
1514
1515 /*
1516  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1517  *
1518  * This is called back from an access-method-specific index build procedure
1519  * after the AM has done whatever setup it needs.  The parent heap relation
1520  * is scanned to find tuples that should be entered into the index.  Each
1521  * such tuple is passed to the AM's callback routine, which does the right
1522  * things to add it to the new index.  After we return, the AM's index
1523  * build procedure does whatever cleanup is needed; in particular, it should
1524  * close the heap and index relations.
1525  *
1526  * The total count of heap tuples is returned.  This is for updating pg_class
1527  * statistics.  (It's annoying not to be able to do that here, but we can't
1528  * do it until after the relation is closed.)  Note that the index AM itself
1529  * must keep track of the number of index tuples; we don't do so here because
1530  * the AM might reject some of the tuples for its own reasons, such as being
1531  * unable to store NULLs.
1532  *
1533  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1534  * any potentially broken HOT chains.  Currently, we set this if there are
1535  * any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
1536  * detect whether they're really incompatible with the chain tip.
1537  */
1538 double
1539 IndexBuildHeapScan(Relation heapRelation,
1540                                    Relation indexRelation,
1541                                    IndexInfo *indexInfo,
1542                                    bool allow_sync,
1543                                    IndexBuildCallback callback,
1544                                    void *callback_state)
1545 {
1546         HeapScanDesc scan;
1547         HeapTuple       heapTuple;
1548         Datum           values[INDEX_MAX_KEYS];
1549         bool            isnull[INDEX_MAX_KEYS];
1550         double          reltuples;
1551         List       *predicate;
1552         TupleTableSlot *slot;
1553         EState     *estate;
1554         ExprContext *econtext;
1555         Snapshot        snapshot;
1556         TransactionId OldestXmin;
1557         BlockNumber root_blkno = InvalidBlockNumber;
1558         OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1559
1560         /*
1561          * sanity checks
1562          */
1563         Assert(OidIsValid(indexRelation->rd_rel->relam));
1564
1565         /*
1566          * Need an EState for evaluation of index expressions and partial-index
1567          * predicates.  Also a slot to hold the current tuple.
1568          */
1569         estate = CreateExecutorState();
1570         econtext = GetPerTupleExprContext(estate);
1571         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1572
1573         /* Arrange for econtext's scan tuple to be the tuple under test */
1574         econtext->ecxt_scantuple = slot;
1575
1576         /* Set up execution state for predicate, if any. */
1577         predicate = (List *)
1578                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1579                                                 estate);
1580
1581         /*
1582          * Prepare for scan of the base relation.  In a normal index build, we use
1583          * SnapshotAny because we must retrieve all tuples and do our own time
1584          * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1585          * concurrent build, we take a regular MVCC snapshot and index whatever's
1586          * live according to that.      During bootstrap we just use SnapshotNow.
1587          */
1588         if (IsBootstrapProcessingMode())
1589         {
1590                 snapshot = SnapshotNow;
1591                 OldestXmin = InvalidTransactionId;              /* not used */
1592         }
1593         else if (indexInfo->ii_Concurrent)
1594         {
1595                 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1596                 OldestXmin = InvalidTransactionId;              /* not used */
1597         }
1598         else
1599         {
1600                 snapshot = SnapshotAny;
1601                 /* okay to ignore lazy VACUUMs here */
1602                 OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1603         }
1604
1605         scan = heap_beginscan_strat(heapRelation,       /* relation */
1606                                                                 snapshot,               /* snapshot */
1607                                                                 0,              /* number of keys */
1608                                                                 NULL,   /* scan key */
1609                                                                 true,   /* buffer access strategy OK */
1610                                                                 allow_sync);    /* syncscan OK? */
1611
1612         reltuples = 0;
1613
1614         /*
1615          * Scan all tuples in the base relation.
1616          */
1617         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1618         {
1619                 bool            tupleIsAlive;
1620
1621                 CHECK_FOR_INTERRUPTS();
1622
1623                 /*
1624                  * When dealing with a HOT-chain of updated tuples, we want to index
1625                  * the values of the live tuple (if any), but index it under the TID
1626                  * of the chain's root tuple.  This approach is necessary to preserve
1627                  * the HOT-chain structure in the heap. So we need to be able to find
1628                  * the root item offset for every tuple that's in a HOT-chain.  When
1629                  * first reaching a new page of the relation, call
1630                  * heap_get_root_tuples() to build a map of root item offsets on the
1631                  * page.
1632                  *
1633                  * It might look unsafe to use this information across buffer
1634                  * lock/unlock.  However, we hold ShareLock on the table so no
1635                  * ordinary insert/update/delete should occur; and we hold pin on the
1636                  * buffer continuously while visiting the page, so no pruning
1637                  * operation can occur either.
1638                  *
1639                  * Note the implied assumption that there is no more than one live
1640                  * tuple per HOT-chain ...
1641                  */
1642                 if (scan->rs_cblock != root_blkno)
1643                 {
1644                         Page            page = BufferGetPage(scan->rs_cbuf);
1645
1646                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1647                         heap_get_root_tuples(page, root_offsets);
1648                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1649
1650                         root_blkno = scan->rs_cblock;
1651                 }
1652
1653                 if (snapshot == SnapshotAny)
1654                 {
1655                         /* do our own time qual check */
1656                         bool            indexIt;
1657
1658         recheck:
1659
1660                         /*
1661                          * We could possibly get away with not locking the buffer here,
1662                          * since caller should hold ShareLock on the relation, but let's
1663                          * be conservative about it.  (This remark is still correct even
1664                          * with HOT-pruning: our pin on the buffer prevents pruning.)
1665                          */
1666                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1667
1668                         switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1669                                                                                          scan->rs_cbuf))
1670                         {
1671                                 case HEAPTUPLE_DEAD:
1672                                         /* Definitely dead, we can ignore it */
1673                                         indexIt = false;
1674                                         tupleIsAlive = false;
1675                                         break;
1676                                 case HEAPTUPLE_LIVE:
1677                                         /* Normal case, index and unique-check it */
1678                                         indexIt = true;
1679                                         tupleIsAlive = true;
1680                                         break;
1681                                 case HEAPTUPLE_RECENTLY_DEAD:
1682
1683                                         /*
1684                                          * If tuple is recently deleted then we must index it
1685                                          * anyway to preserve MVCC semantics.  (Pre-existing
1686                                          * transactions could try to use the index after we finish
1687                                          * building it, and may need to see such tuples.)
1688                                          *
1689                                          * However, if it was HOT-updated then we must only index
1690                                          * the live tuple at the end of the HOT-chain.  Since this
1691                                          * breaks semantics for pre-existing snapshots, mark the
1692                                          * index as unusable for them.
1693                                          *
1694                                          * If we've already decided that the index will be unsafe
1695                                          * for old snapshots, we may as well stop indexing
1696                                          * recently-dead tuples, since there's no longer any
1697                                          * point.
1698                                          */
1699                                         if (HeapTupleIsHotUpdated(heapTuple))
1700                                         {
1701                                                 indexIt = false;
1702                                                 /* mark the index as unsafe for old snapshots */
1703                                                 indexInfo->ii_BrokenHotChain = true;
1704                                         }
1705                                         else if (indexInfo->ii_BrokenHotChain)
1706                                                 indexIt = false;
1707                                         else
1708                                                 indexIt = true;
1709                                         /* In any case, exclude the tuple from unique-checking */
1710                                         tupleIsAlive = false;
1711                                         break;
1712                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
1713
1714                                         /*
1715                                          * Since caller should hold ShareLock or better, we should
1716                                          * not see any tuples inserted by open transactions ---
1717                                          * unless it's our own transaction. (Consider INSERT
1718                                          * followed by CREATE INDEX within a transaction.)      An
1719                                          * exception occurs when reindexing a system catalog,
1720                                          * because we often release lock on system catalogs before
1721                                          * committing.  In that case we wait for the inserting
1722                                          * transaction to finish and check again.  (We could do
1723                                          * that on user tables too, but since the case is not
1724                                          * expected it seems better to throw an error.)
1725                                          */
1726                                         if (!TransactionIdIsCurrentTransactionId(
1727                                                                   HeapTupleHeaderGetXmin(heapTuple->t_data)))
1728                                         {
1729                                                 if (!IsSystemRelation(heapRelation))
1730                                                         elog(ERROR, "concurrent insert in progress");
1731                                                 else
1732                                                 {
1733                                                         /*
1734                                                          * Must drop the lock on the buffer before we wait
1735                                                          */
1736                                                         TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1737
1738                                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1739                                                         XactLockTableWait(xwait);
1740                                                         goto recheck;
1741                                                 }
1742                                         }
1743
1744                                         /*
1745                                          * We must index such tuples, since if the index build
1746                                          * commits then they're good.
1747                                          */
1748                                         indexIt = true;
1749                                         tupleIsAlive = true;
1750                                         break;
1751                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
1752
1753                                         /*
1754                                          * Since caller should hold ShareLock or better, we should
1755                                          * not see any tuples deleted by open transactions ---
1756                                          * unless it's our own transaction. (Consider DELETE
1757                                          * followed by CREATE INDEX within a transaction.)      An
1758                                          * exception occurs when reindexing a system catalog,
1759                                          * because we often release lock on system catalogs before
1760                                          * committing.  In that case we wait for the deleting
1761                                          * transaction to finish and check again.  (We could do
1762                                          * that on user tables too, but since the case is not
1763                                          * expected it seems better to throw an error.)
1764                                          */
1765                                         Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1766                                         if (!TransactionIdIsCurrentTransactionId(
1767                                                                   HeapTupleHeaderGetXmax(heapTuple->t_data)))
1768                                         {
1769                                                 if (!IsSystemRelation(heapRelation))
1770                                                         elog(ERROR, "concurrent delete in progress");
1771                                                 else
1772                                                 {
1773                                                         /*
1774                                                          * Must drop the lock on the buffer before we wait
1775                                                          */
1776                                                         TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
1777
1778                                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1779                                                         XactLockTableWait(xwait);
1780                                                         goto recheck;
1781                                                 }
1782                                         }
1783
1784                                         /*
1785                                          * Otherwise, we have to treat these tuples just like
1786                                          * RECENTLY_DELETED ones.
1787                                          */
1788                                         if (HeapTupleIsHotUpdated(heapTuple))
1789                                         {
1790                                                 indexIt = false;
1791                                                 /* mark the index as unsafe for old snapshots */
1792                                                 indexInfo->ii_BrokenHotChain = true;
1793                                         }
1794                                         else if (indexInfo->ii_BrokenHotChain)
1795                                                 indexIt = false;
1796                                         else
1797                                                 indexIt = true;
1798                                         /* In any case, exclude the tuple from unique-checking */
1799                                         tupleIsAlive = false;
1800                                         break;
1801                                 default:
1802                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1803                                         indexIt = tupleIsAlive = false;         /* keep compiler quiet */
1804                                         break;
1805                         }
1806
1807                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1808
1809                         if (!indexIt)
1810                                 continue;
1811                 }
1812                 else
1813                 {
1814                         /* heap_getnext did the time qual check */
1815                         tupleIsAlive = true;
1816                 }
1817
1818                 reltuples += 1;
1819
1820                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1821
1822                 /* Set up for predicate or expression evaluation */
1823                 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1824
1825                 /*
1826                  * In a partial index, discard tuples that don't satisfy the
1827                  * predicate.
1828                  */
1829                 if (predicate != NIL)
1830                 {
1831                         if (!ExecQual(predicate, econtext, false))
1832                                 continue;
1833                 }
1834
1835                 /*
1836                  * For the current heap tuple, extract all the attributes we use in
1837                  * this index, and note which are null.  This also performs evaluation
1838                  * of any expressions needed.
1839                  */
1840                 FormIndexDatum(indexInfo,
1841                                            slot,
1842                                            estate,
1843                                            values,
1844                                            isnull);
1845
1846                 /*
1847                  * You'd think we should go ahead and build the index tuple here, but
1848                  * some index AMs want to do further processing on the data first.      So
1849                  * pass the values[] and isnull[] arrays, instead.
1850                  */
1851
1852                 if (HeapTupleIsHeapOnly(heapTuple))
1853                 {
1854                         /*
1855                          * For a heap-only tuple, pretend its TID is that of the root. See
1856                          * src/backend/access/heap/README.HOT for discussion.
1857                          */
1858                         HeapTupleData rootTuple;
1859                         OffsetNumber offnum;
1860
1861                         rootTuple = *heapTuple;
1862                         offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1863
1864                         Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
1865
1866                         ItemPointerSetOffsetNumber(&rootTuple.t_self,
1867                                                                            root_offsets[offnum - 1]);
1868
1869                         /* Call the AM's callback routine to process the tuple */
1870                         callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1871                                          callback_state);
1872                 }
1873                 else
1874                 {
1875                         /* Call the AM's callback routine to process the tuple */
1876                         callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1877                                          callback_state);
1878                 }
1879         }
1880
1881         heap_endscan(scan);
1882
1883         /* we can now forget our snapshot, if set */
1884         if (indexInfo->ii_Concurrent)
1885                 UnregisterSnapshot(snapshot);
1886
1887         ExecDropSingleTupleTableSlot(slot);
1888
1889         FreeExecutorState(estate);
1890
1891         /* These may have been pointing to the now-gone estate */
1892         indexInfo->ii_ExpressionsState = NIL;
1893         indexInfo->ii_PredicateState = NIL;
1894
1895         return reltuples;
1896 }
1897
1898
1899 /*
1900  * validate_index - support code for concurrent index builds
1901  *
1902  * We do a concurrent index build by first inserting the catalog entry for the
1903  * index via index_create(), marking it not indisready and not indisvalid.
1904  * Then we commit our transaction and start a new one, then we wait for all
1905  * transactions that could have been modifying the table to terminate.  Now
1906  * we know that any subsequently-started transactions will see the index and
1907  * honor its constraints on HOT updates; so while existing HOT-chains might
1908  * be broken with respect to the index, no currently live tuple will have an
1909  * incompatible HOT update done to it.  We now build the index normally via
1910  * index_build(), while holding a weak lock that allows concurrent
1911  * insert/update/delete.  Also, we index only tuples that are valid
1912  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1913  * build takes care to include recently-dead tuples.  This is OK because
1914  * we won't mark the index valid until all transactions that might be able
1915  * to see those tuples are gone.  The reason for doing that is to avoid
1916  * bogus unique-index failures due to concurrent UPDATEs (we might see
1917  * different versions of the same row as being valid when we pass over them,
1918  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
1919  * does not contain any tuples added to the table while we built the index.
1920  *
1921  * Next, we mark the index "indisready" (but still not "indisvalid") and
1922  * commit the second transaction and start a third.  Again we wait for all
1923  * transactions that could have been modifying the table to terminate.  Now
1924  * we know that any subsequently-started transactions will see the index and
1925  * insert their new tuples into it.  We then take a new reference snapshot
1926  * which is passed to validate_index().  Any tuples that are valid according
1927  * to this snap, but are not in the index, must be added to the index.
1928  * (Any tuples committed live after the snap will be inserted into the
1929  * index by their originating transaction.      Any tuples committed dead before
1930  * the snap need not be indexed, because we will wait out all transactions
1931  * that might care about them before we mark the index valid.)
1932  *
1933  * validate_index() works by first gathering all the TIDs currently in the
1934  * index, using a bulkdelete callback that just stores the TIDs and doesn't
1935  * ever say "delete it".  (This should be faster than a plain indexscan;
1936  * also, not all index AMs support full-index indexscan.)  Then we sort the
1937  * TIDs, and finally scan the table doing a "merge join" against the TID list
1938  * to see which tuples are missing from the index.      Thus we will ensure that
1939  * all tuples valid according to the reference snapshot are in the index.
1940  *
1941  * Building a unique index this way is tricky: we might try to insert a
1942  * tuple that is already dead or is in process of being deleted, and we
1943  * mustn't have a uniqueness failure against an updated version of the same
1944  * row.  We could try to check the tuple to see if it's already dead and tell
1945  * index_insert() not to do the uniqueness check, but that still leaves us
1946  * with a race condition against an in-progress update.  To handle that,
1947  * we expect the index AM to recheck liveness of the to-be-inserted tuple
1948  * before it declares a uniqueness error.
1949  *
1950  * After completing validate_index(), we wait until all transactions that
1951  * were alive at the time of the reference snapshot are gone; this is
1952  * necessary to be sure there are none left with a serializable snapshot
1953  * older than the reference (and hence possibly able to see tuples we did
1954  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
1955  * transactions will be able to use it for queries.
1956  *
1957  * Doing two full table scans is a brute-force strategy.  We could try to be
1958  * cleverer, eg storing new tuples in a special area of the table (perhaps
1959  * making the table append-only by setting use_fsm).  However that would
1960  * add yet more locking issues.
1961  */
1962 void
1963 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1964 {
1965         Relation        heapRelation,
1966                                 indexRelation;
1967         IndexInfo  *indexInfo;
1968         IndexVacuumInfo ivinfo;
1969         v_i_state       state;
1970         Oid                     save_userid;
1971         bool            save_secdefcxt;
1972
1973         /* Open and lock the parent heap relation */
1974         heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1975         /* And the target index relation */
1976         indexRelation = index_open(indexId, RowExclusiveLock);
1977
1978         /*
1979          * Fetch info needed for index_insert.  (You might think this should be
1980          * passed in from DefineIndex, but its copy is long gone due to having
1981          * been built in a previous transaction.)
1982          */
1983         indexInfo = BuildIndexInfo(indexRelation);
1984
1985         /* mark build is concurrent just for consistency */
1986         indexInfo->ii_Concurrent = true;
1987
1988         /*
1989          * Switch to the table owner's userid, so that any index functions are run
1990          * as that user.
1991          */
1992         GetUserIdAndContext(&save_userid, &save_secdefcxt);
1993         SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1994
1995         /*
1996          * Scan the index and gather up all the TIDs into a tuplesort object.
1997          */
1998         ivinfo.index = indexRelation;
1999         ivinfo.vacuum_full = false;
2000         ivinfo.analyze_only = false;
2001         ivinfo.estimated_count = true;
2002         ivinfo.message_level = DEBUG2;
2003         ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2004         ivinfo.strategy = NULL;
2005
2006         state.tuplesort = tuplesort_begin_datum(TIDOID,
2007                                                                                         TIDLessOperator, false,
2008                                                                                         maintenance_work_mem,
2009                                                                                         false);
2010         state.htups = state.itups = state.tups_inserted = 0;
2011
2012         (void) index_bulk_delete(&ivinfo, NULL,
2013                                                          validate_index_callback, (void *) &state);
2014
2015         /* Execute the sort */
2016         tuplesort_performsort(state.tuplesort);
2017
2018         /*
2019          * Now scan the heap and "merge" it with the index
2020          */
2021         validate_index_heapscan(heapRelation,
2022                                                         indexRelation,
2023                                                         indexInfo,
2024                                                         snapshot,
2025                                                         &state);
2026
2027         /* Done with tuplesort object */
2028         tuplesort_end(state.tuplesort);
2029
2030         elog(DEBUG2,
2031                  "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2032                  state.htups, state.itups, state.tups_inserted);
2033
2034         /* Restore userid */
2035         SetUserIdAndContext(save_userid, save_secdefcxt);
2036
2037         /* Close rels, but keep locks */
2038         index_close(indexRelation, NoLock);
2039         heap_close(heapRelation, NoLock);
2040 }
2041
2042 /*
2043  * validate_index_callback - bulkdelete callback to collect the index TIDs
2044  */
2045 static bool
2046 validate_index_callback(ItemPointer itemptr, void *opaque)
2047 {
2048         v_i_state  *state = (v_i_state *) opaque;
2049
2050         tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
2051         state->itups += 1;
2052         return false;                           /* never actually delete anything */
2053 }
2054
2055 /*
2056  * validate_index_heapscan - second table scan for concurrent index build
2057  *
2058  * This has much code in common with IndexBuildHeapScan, but it's enough
2059  * different that it seems cleaner to have two routines not one.
2060  */
2061 static void
2062 validate_index_heapscan(Relation heapRelation,
2063                                                 Relation indexRelation,
2064                                                 IndexInfo *indexInfo,
2065                                                 Snapshot snapshot,
2066                                                 v_i_state *state)
2067 {
2068         HeapScanDesc scan;
2069         HeapTuple       heapTuple;
2070         Datum           values[INDEX_MAX_KEYS];
2071         bool            isnull[INDEX_MAX_KEYS];
2072         List       *predicate;
2073         TupleTableSlot *slot;
2074         EState     *estate;
2075         ExprContext *econtext;
2076         BlockNumber root_blkno = InvalidBlockNumber;
2077         OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2078         bool            in_index[MaxHeapTuplesPerPage];
2079
2080         /* state variables for the merge */
2081         ItemPointer indexcursor = NULL;
2082         bool            tuplesort_empty = false;
2083
2084         /*
2085          * sanity checks
2086          */
2087         Assert(OidIsValid(indexRelation->rd_rel->relam));
2088
2089         /*
2090          * Need an EState for evaluation of index expressions and partial-index
2091          * predicates.  Also a slot to hold the current tuple.
2092          */
2093         estate = CreateExecutorState();
2094         econtext = GetPerTupleExprContext(estate);
2095         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2096
2097         /* Arrange for econtext's scan tuple to be the tuple under test */
2098         econtext->ecxt_scantuple = slot;
2099
2100         /* Set up execution state for predicate, if any. */
2101         predicate = (List *)
2102                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2103                                                 estate);
2104
2105         /*
2106          * Prepare for scan of the base relation.  We need just those tuples
2107          * satisfying the passed-in reference snapshot.  We must disable syncscan
2108          * here, because it's critical that we read from block zero forward to
2109          * match the sorted TIDs.
2110          */
2111         scan = heap_beginscan_strat(heapRelation,       /* relation */
2112                                                                 snapshot,               /* snapshot */
2113                                                                 0,              /* number of keys */
2114                                                                 NULL,   /* scan key */
2115                                                                 true,   /* buffer access strategy OK */
2116                                                                 false); /* syncscan not OK */
2117
2118         /*
2119          * Scan all tuples matching the snapshot.
2120          */
2121         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2122         {
2123                 ItemPointer heapcursor = &heapTuple->t_self;
2124                 ItemPointerData rootTuple;
2125                 OffsetNumber root_offnum;
2126
2127                 CHECK_FOR_INTERRUPTS();
2128
2129                 state->htups += 1;
2130
2131                 /*
2132                  * As commented in IndexBuildHeapScan, we should index heap-only
2133                  * tuples under the TIDs of their root tuples; so when we advance onto
2134                  * a new heap page, build a map of root item offsets on the page.
2135                  *
2136                  * This complicates merging against the tuplesort output: we will
2137                  * visit the live tuples in order by their offsets, but the root
2138                  * offsets that we need to compare against the index contents might be
2139                  * ordered differently.  So we might have to "look back" within the
2140                  * tuplesort output, but only within the current page.  We handle that
2141                  * by keeping a bool array in_index[] showing all the
2142                  * already-passed-over tuplesort output TIDs of the current page. We
2143                  * clear that array here, when advancing onto a new heap page.
2144                  */
2145                 if (scan->rs_cblock != root_blkno)
2146                 {
2147                         Page            page = BufferGetPage(scan->rs_cbuf);
2148
2149                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2150                         heap_get_root_tuples(page, root_offsets);
2151                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2152
2153                         memset(in_index, 0, sizeof(in_index));
2154
2155                         root_blkno = scan->rs_cblock;
2156                 }
2157
2158                 /* Convert actual tuple TID to root TID */
2159                 rootTuple = *heapcursor;
2160                 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
2161
2162                 if (HeapTupleIsHeapOnly(heapTuple))
2163                 {
2164                         root_offnum = root_offsets[root_offnum - 1];
2165                         Assert(OffsetNumberIsValid(root_offnum));
2166                         ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
2167                 }
2168
2169                 /*
2170                  * "merge" by skipping through the index tuples until we find or pass
2171                  * the current root tuple.
2172                  */
2173                 while (!tuplesort_empty &&
2174                            (!indexcursor ||
2175                                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
2176                 {
2177                         Datum           ts_val;
2178                         bool            ts_isnull;
2179
2180                         if (indexcursor)
2181                         {
2182                                 /*
2183                                  * Remember index items seen earlier on the current heap page
2184                                  */
2185                                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
2186                                         in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
2187                                 pfree(indexcursor);
2188                         }
2189
2190                         tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
2191                                                                                                   &ts_val, &ts_isnull);
2192                         Assert(tuplesort_empty || !ts_isnull);
2193                         indexcursor = (ItemPointer) DatumGetPointer(ts_val);
2194                 }
2195
2196                 /*
2197                  * If the tuplesort has overshot *and* we didn't see a match earlier,
2198                  * then this tuple is missing from the index, so insert it.
2199                  */
2200                 if ((tuplesort_empty ||
2201                          ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
2202                         !in_index[root_offnum - 1])
2203                 {
2204                         MemoryContextReset(econtext->ecxt_per_tuple_memory);
2205
2206                         /* Set up for predicate or expression evaluation */
2207                         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2208
2209                         /*
2210                          * In a partial index, discard tuples that don't satisfy the
2211                          * predicate.
2212                          */
2213                         if (predicate != NIL)
2214                         {
2215                                 if (!ExecQual(predicate, econtext, false))
2216                                         continue;
2217                         }
2218
2219                         /*
2220                          * For the current heap tuple, extract all the attributes we use
2221                          * in this index, and note which are null.      This also performs
2222                          * evaluation of any expressions needed.
2223                          */
2224                         FormIndexDatum(indexInfo,
2225                                                    slot,
2226                                                    estate,
2227                                                    values,
2228                                                    isnull);
2229
2230                         /*
2231                          * You'd think we should go ahead and build the index tuple here,
2232                          * but some index AMs want to do further processing on the data
2233                          * first. So pass the values[] and isnull[] arrays, instead.
2234                          */
2235
2236                         /*
2237                          * If the tuple is already committed dead, you might think we
2238                          * could suppress uniqueness checking, but this is no longer true
2239                          * in the presence of HOT, because the insert is actually a proxy
2240                          * for a uniqueness check on the whole HOT-chain.  That is, the
2241                          * tuple we have here could be dead because it was already
2242                          * HOT-updated, and if so the updating transaction will not have
2243                          * thought it should insert index entries.      The index AM will
2244                          * check the whole HOT-chain and correctly detect a conflict if
2245                          * there is one.
2246                          */
2247
2248                         index_insert(indexRelation,
2249                                                  values,
2250                                                  isnull,
2251                                                  &rootTuple,
2252                                                  heapRelation,
2253                                                  indexInfo->ii_Unique ?
2254                                                  UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
2255
2256                         state->tups_inserted += 1;
2257                 }
2258         }
2259
2260         heap_endscan(scan);
2261
2262         ExecDropSingleTupleTableSlot(slot);
2263
2264         FreeExecutorState(estate);
2265
2266         /* These may have been pointing to the now-gone estate */
2267         indexInfo->ii_ExpressionsState = NIL;
2268         indexInfo->ii_PredicateState = NIL;
2269 }
2270
2271
2272 /*
2273  * IndexGetRelation: given an index's relation OID, get the OID of the
2274  * relation it is an index on.  Uses the system cache.
2275  */
2276 static Oid
2277 IndexGetRelation(Oid indexId)
2278 {
2279         HeapTuple       tuple;
2280         Form_pg_index index;
2281         Oid                     result;
2282
2283         tuple = SearchSysCache(INDEXRELID,
2284                                                    ObjectIdGetDatum(indexId),
2285                                                    0, 0, 0);
2286         if (!HeapTupleIsValid(tuple))
2287                 elog(ERROR, "cache lookup failed for index %u", indexId);
2288         index = (Form_pg_index) GETSTRUCT(tuple);
2289         Assert(index->indexrelid == indexId);
2290
2291         result = index->indrelid;
2292         ReleaseSysCache(tuple);
2293         return result;
2294 }
2295
2296 /*
2297  * reindex_index - This routine is used to recreate a single index
2298  */
2299 void
2300 reindex_index(Oid indexId)
2301 {
2302         Relation        iRel,
2303                                 heapRelation,
2304                                 pg_index;
2305         Oid                     heapId;
2306         bool            inplace;
2307         IndexInfo  *indexInfo;
2308         HeapTuple       indexTuple;
2309         Form_pg_index indexForm;
2310
2311         /*
2312          * Open and lock the parent heap relation.      ShareLock is sufficient since
2313          * we only need to be sure no schema or data changes are going on.
2314          */
2315         heapId = IndexGetRelation(indexId);
2316         heapRelation = heap_open(heapId, ShareLock);
2317
2318         /*
2319          * Open the target index relation and get an exclusive lock on it, to
2320          * ensure that no one else is touching this particular index.
2321          */
2322         iRel = index_open(indexId, AccessExclusiveLock);
2323
2324         /*
2325          * Don't allow reindex on temp tables of other backends ... their local
2326          * buffer manager is not going to cope.
2327          */
2328         if (RELATION_IS_OTHER_TEMP(iRel))
2329                 ereport(ERROR,
2330                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2331                            errmsg("cannot reindex temporary tables of other sessions")));
2332
2333         /*
2334          * Also check for active uses of the index in the current transaction; we
2335          * don't want to reindex underneath an open indexscan.
2336          */
2337         CheckTableNotInUse(iRel, "REINDEX INDEX");
2338
2339         /*
2340          * If it's a shared index, we must do inplace processing (because we have
2341          * no way to update relfilenode in other databases).  Otherwise we can do
2342          * it the normal transaction-safe way.
2343          *
2344          * Since inplace processing isn't crash-safe, we only allow it in a
2345          * standalone backend.  (In the REINDEX TABLE and REINDEX DATABASE cases,
2346          * the caller should have detected this.)
2347          */
2348         inplace = iRel->rd_rel->relisshared;
2349
2350         if (inplace && IsUnderPostmaster)
2351                 ereport(ERROR,
2352                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2353                                  errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
2354                                                 RelationGetRelationName(iRel))));
2355
2356         PG_TRY();
2357         {
2358                 /* Suppress use of the target index while rebuilding it */
2359                 SetReindexProcessing(heapId, indexId);
2360
2361                 /* Fetch info needed for index_build */
2362                 indexInfo = BuildIndexInfo(iRel);
2363
2364                 if (inplace)
2365                 {
2366                         /*
2367                          * Truncate the actual file (and discard buffers).
2368                          */
2369                         RelationTruncate(iRel, 0);
2370                 }
2371                 else
2372                 {
2373                         /*
2374                          * We'll build a new physical relation for the index.
2375                          */
2376                         setNewRelfilenode(iRel, InvalidTransactionId);
2377                 }
2378
2379                 /* Initialize the index and rebuild */
2380                 /* Note: we do not need to re-establish pkey setting */
2381                 index_build(heapRelation, iRel, indexInfo, false);
2382         }
2383         PG_CATCH();
2384         {
2385                 /* Make sure flag gets cleared on error exit */
2386                 ResetReindexProcessing();
2387                 PG_RE_THROW();
2388         }
2389         PG_END_TRY();
2390         ResetReindexProcessing();
2391
2392         /*
2393          * If the index is marked invalid or not ready (ie, it's from a failed
2394          * CREATE INDEX CONCURRENTLY), we can now mark it valid.  This allows
2395          * REINDEX to be used to clean up in such cases.
2396          *
2397          * We can also reset indcheckxmin, because we have now done a
2398          * non-concurrent index build, *except* in the case where index_build
2399          * found some still-broken HOT chains.
2400          */
2401         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2402
2403         indexTuple = SearchSysCacheCopy(INDEXRELID,
2404                                                                         ObjectIdGetDatum(indexId),
2405                                                                         0, 0, 0);
2406         if (!HeapTupleIsValid(indexTuple))
2407                 elog(ERROR, "cache lookup failed for index %u", indexId);
2408         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2409
2410         if (!indexForm->indisvalid || !indexForm->indisready ||
2411                 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
2412         {
2413                 indexForm->indisvalid = true;
2414                 indexForm->indisready = true;
2415                 if (!indexInfo->ii_BrokenHotChain)
2416                         indexForm->indcheckxmin = false;
2417                 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2418                 CatalogUpdateIndexes(pg_index, indexTuple);
2419         }
2420         heap_close(pg_index, RowExclusiveLock);
2421
2422         /* Close rels, but keep locks */
2423         index_close(iRel, NoLock);
2424         heap_close(heapRelation, NoLock);
2425 }
2426
2427 /*
2428  * reindex_relation - This routine is used to recreate all indexes
2429  * of a relation (and optionally its toast relation too, if any).
2430  *
2431  * Returns true if any indexes were rebuilt.  Note that a
2432  * CommandCounterIncrement will occur after each index rebuild.
2433  */
2434 bool
2435 reindex_relation(Oid relid, bool toast_too)
2436 {
2437         Relation        rel;
2438         Oid                     toast_relid;
2439         bool            is_pg_class;
2440         bool            result;
2441         List       *indexIds,
2442                            *doneIndexes;
2443         ListCell   *indexId;
2444
2445         /*
2446          * Open and lock the relation.  ShareLock is sufficient since we only need
2447          * to prevent schema and data changes in it.
2448          */
2449         rel = heap_open(relid, ShareLock);
2450
2451         toast_relid = rel->rd_rel->reltoastrelid;
2452
2453         /*
2454          * Get the list of index OIDs for this relation.  (We trust to the
2455          * relcache to get this with a sequential scan if ignoring system
2456          * indexes.)
2457          */
2458         indexIds = RelationGetIndexList(rel);
2459
2460         /*
2461          * reindex_index will attempt to update the pg_class rows for the relation
2462          * and index.  If we are processing pg_class itself, we want to make sure
2463          * that the updates do not try to insert index entries into indexes we
2464          * have not processed yet.      (When we are trying to recover from corrupted
2465          * indexes, that could easily cause a crash.) We can accomplish this
2466          * because CatalogUpdateIndexes will use the relcache's index list to know
2467          * which indexes to update. We just force the index list to be only the
2468          * stuff we've processed.
2469          *
2470          * It is okay to not insert entries into the indexes we have not processed
2471          * yet because all of this is transaction-safe.  If we fail partway
2472          * through, the updated rows are dead and it doesn't matter whether they
2473          * have index entries.  Also, a new pg_class index will be created with an
2474          * entry for its own pg_class row because we do setNewRelfilenode() before
2475          * we do index_build().
2476          *
2477          * Note that we also clear pg_class's rd_oidindex until the loop is done,
2478          * so that that index can't be accessed either.  This means we cannot
2479          * safely generate new relation OIDs while in the loop; shouldn't be a
2480          * problem.
2481          */
2482         is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2483
2484         /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
2485         if (is_pg_class)
2486                 (void) RelationGetIndexAttrBitmap(rel);
2487
2488         /* Reindex all the indexes. */
2489         doneIndexes = NIL;
2490         foreach(indexId, indexIds)
2491         {
2492                 Oid                     indexOid = lfirst_oid(indexId);
2493
2494                 if (is_pg_class)
2495                         RelationSetIndexList(rel, doneIndexes, InvalidOid);
2496
2497                 reindex_index(indexOid);
2498
2499                 CommandCounterIncrement();
2500
2501                 if (is_pg_class)
2502                         doneIndexes = lappend_oid(doneIndexes, indexOid);
2503         }
2504
2505         if (is_pg_class)
2506                 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2507
2508         /*
2509          * Close rel, but continue to hold the lock.
2510          */
2511         heap_close(rel, NoLock);
2512
2513         result = (indexIds != NIL);
2514
2515         /*
2516          * If the relation has a secondary toast rel, reindex that too while we
2517          * still hold the lock on the master table.
2518          */
2519         if (toast_too && OidIsValid(toast_relid))
2520                 result |= reindex_relation(toast_relid, false);
2521
2522         return result;
2523 }