]> granicus.if.org Git - postgresql/blob - src/backend/commands/analyze.c
Use abbreviated keys for faster sorting of text datums.
[postgresql] / src / backend / commands / analyze.c
1 /*-------------------------------------------------------------------------
2  *
3  * analyze.c
4  *        the Postgres statistics generator
5  *
6  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/commands/analyze.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "access/multixact.h"
20 #include "access/transam.h"
21 #include "access/tupconvert.h"
22 #include "access/tuptoaster.h"
23 #include "access/visibilitymap.h"
24 #include "access/xact.h"
25 #include "catalog/catalog.h"
26 #include "catalog/index.h"
27 #include "catalog/indexing.h"
28 #include "catalog/pg_collation.h"
29 #include "catalog/pg_inherits_fn.h"
30 #include "catalog/pg_namespace.h"
31 #include "commands/dbcommands.h"
32 #include "commands/tablecmds.h"
33 #include "commands/vacuum.h"
34 #include "executor/executor.h"
35 #include "foreign/fdwapi.h"
36 #include "miscadmin.h"
37 #include "nodes/nodeFuncs.h"
38 #include "parser/parse_oper.h"
39 #include "parser/parse_relation.h"
40 #include "pgstat.h"
41 #include "postmaster/autovacuum.h"
42 #include "storage/bufmgr.h"
43 #include "storage/lmgr.h"
44 #include "storage/proc.h"
45 #include "storage/procarray.h"
46 #include "utils/acl.h"
47 #include "utils/attoptcache.h"
48 #include "utils/datum.h"
49 #include "utils/guc.h"
50 #include "utils/lsyscache.h"
51 #include "utils/memutils.h"
52 #include "utils/pg_rusage.h"
53 #include "utils/sortsupport.h"
54 #include "utils/syscache.h"
55 #include "utils/timestamp.h"
56 #include "utils/tqual.h"
57
58
59 /* Data structure for Algorithm S from Knuth 3.4.2 */
60 typedef struct
61 {
62         BlockNumber N;                          /* number of blocks, known in advance */
63         int                     n;                              /* desired sample size */
64         BlockNumber t;                          /* current block number */
65         int                     m;                              /* blocks selected so far */
66 } BlockSamplerData;
67
68 typedef BlockSamplerData *BlockSampler;
69
70 /* Per-index data for ANALYZE */
71 typedef struct AnlIndexData
72 {
73         IndexInfo  *indexInfo;          /* BuildIndexInfo result */
74         double          tupleFract;             /* fraction of rows for partial index */
75         VacAttrStats **vacattrstats;    /* index attrs to analyze */
76         int                     attr_cnt;
77 } AnlIndexData;
78
79
80 /* Default statistics target (GUC parameter) */
81 int                     default_statistics_target = 100;
82
83 /* A few variables that don't seem worth passing around as parameters */
84 static MemoryContext anl_context = NULL;
85 static BufferAccessStrategy vac_strategy;
86
87
88 static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
89                            AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
90                            bool inh, bool in_outer_xact, int elevel);
91 static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
92                                   int samplesize);
93 static bool BlockSampler_HasMore(BlockSampler bs);
94 static BlockNumber BlockSampler_Next(BlockSampler bs);
95 static void compute_index_stats(Relation onerel, double totalrows,
96                                         AnlIndexData *indexdata, int nindexes,
97                                         HeapTuple *rows, int numrows,
98                                         MemoryContext col_context);
99 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
100                                   Node *index_expr);
101 static int acquire_sample_rows(Relation onerel, int elevel,
102                                         HeapTuple *rows, int targrows,
103                                         double *totalrows, double *totaldeadrows);
104 static int      compare_rows(const void *a, const void *b);
105 static int acquire_inherited_sample_rows(Relation onerel, int elevel,
106                                                           HeapTuple *rows, int targrows,
107                                                           double *totalrows, double *totaldeadrows);
108 static void update_attstats(Oid relid, bool inh,
109                                 int natts, VacAttrStats **vacattrstats);
110 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
111 static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
112
113
114 /*
115  *      analyze_rel() -- analyze one relation
116  */
117 void
118 analyze_rel(Oid relid, VacuumStmt *vacstmt,
119                         bool in_outer_xact, BufferAccessStrategy bstrategy)
120 {
121         Relation        onerel;
122         int                     elevel;
123         AcquireSampleRowsFunc acquirefunc = NULL;
124         BlockNumber relpages = 0;
125
126         /* Select logging level */
127         if (vacstmt->options & VACOPT_VERBOSE)
128                 elevel = INFO;
129         else
130                 elevel = DEBUG2;
131
132         /* Set up static variables */
133         vac_strategy = bstrategy;
134
135         /*
136          * Check for user-requested abort.
137          */
138         CHECK_FOR_INTERRUPTS();
139
140         /*
141          * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
142          * ANALYZEs don't run on it concurrently.  (This also locks out a
143          * concurrent VACUUM, which doesn't matter much at the moment but might
144          * matter if we ever try to accumulate stats on dead tuples.) If the rel
145          * has been dropped since we last saw it, we don't need to process it.
146          */
147         if (!(vacstmt->options & VACOPT_NOWAIT))
148                 onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
149         else if (ConditionalLockRelationOid(relid, ShareUpdateExclusiveLock))
150                 onerel = try_relation_open(relid, NoLock);
151         else
152         {
153                 onerel = NULL;
154                 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
155                         ereport(LOG,
156                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
157                                   errmsg("skipping analyze of \"%s\" --- lock not available",
158                                                  vacstmt->relation->relname)));
159         }
160         if (!onerel)
161                 return;
162
163         /*
164          * Check permissions --- this should match vacuum's check!
165          */
166         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
167                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
168         {
169                 /* No need for a WARNING if we already complained during VACUUM */
170                 if (!(vacstmt->options & VACOPT_VACUUM))
171                 {
172                         if (onerel->rd_rel->relisshared)
173                                 ereport(WARNING,
174                                  (errmsg("skipping \"%s\" --- only superuser can analyze it",
175                                                  RelationGetRelationName(onerel))));
176                         else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
177                                 ereport(WARNING,
178                                                 (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
179                                                                 RelationGetRelationName(onerel))));
180                         else
181                                 ereport(WARNING,
182                                                 (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
183                                                                 RelationGetRelationName(onerel))));
184                 }
185                 relation_close(onerel, ShareUpdateExclusiveLock);
186                 return;
187         }
188
189         /*
190          * Silently ignore tables that are temp tables of other backends ---
191          * trying to analyze these is rather pointless, since their contents are
192          * probably not up-to-date on disk.  (We don't throw a warning here; it
193          * would just lead to chatter during a database-wide ANALYZE.)
194          */
195         if (RELATION_IS_OTHER_TEMP(onerel))
196         {
197                 relation_close(onerel, ShareUpdateExclusiveLock);
198                 return;
199         }
200
201         /*
202          * We can ANALYZE any table except pg_statistic. See update_attstats
203          */
204         if (RelationGetRelid(onerel) == StatisticRelationId)
205         {
206                 relation_close(onerel, ShareUpdateExclusiveLock);
207                 return;
208         }
209
210         /*
211          * Check that it's a plain table, materialized view, or foreign table; we
212          * used to do this in get_rel_oids() but seems safer to check after we've
213          * locked the relation.
214          */
215         if (onerel->rd_rel->relkind == RELKIND_RELATION ||
216                 onerel->rd_rel->relkind == RELKIND_MATVIEW)
217         {
218                 /* Regular table, so we'll use the regular row acquisition function */
219                 acquirefunc = acquire_sample_rows;
220                 /* Also get regular table's size */
221                 relpages = RelationGetNumberOfBlocks(onerel);
222         }
223         else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
224         {
225                 /*
226                  * For a foreign table, call the FDW's hook function to see whether it
227                  * supports analysis.
228                  */
229                 FdwRoutine *fdwroutine;
230                 bool            ok = false;
231
232                 fdwroutine = GetFdwRoutineForRelation(onerel, false);
233
234                 if (fdwroutine->AnalyzeForeignTable != NULL)
235                         ok = fdwroutine->AnalyzeForeignTable(onerel,
236                                                                                                  &acquirefunc,
237                                                                                                  &relpages);
238
239                 if (!ok)
240                 {
241                         ereport(WARNING,
242                          (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
243                                          RelationGetRelationName(onerel))));
244                         relation_close(onerel, ShareUpdateExclusiveLock);
245                         return;
246                 }
247         }
248         else
249         {
250                 /* No need for a WARNING if we already complained during VACUUM */
251                 if (!(vacstmt->options & VACOPT_VACUUM))
252                         ereport(WARNING,
253                                         (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
254                                                         RelationGetRelationName(onerel))));
255                 relation_close(onerel, ShareUpdateExclusiveLock);
256                 return;
257         }
258
259         /*
260          * OK, let's do it.  First let other backends know I'm in ANALYZE.
261          */
262         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
263         MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
264         LWLockRelease(ProcArrayLock);
265
266         /*
267          * Do the normal non-recursive ANALYZE.
268          */
269         do_analyze_rel(onerel, vacstmt, acquirefunc, relpages,
270                                    false, in_outer_xact, elevel);
271
272         /*
273          * If there are child tables, do recursive ANALYZE.
274          */
275         if (onerel->rd_rel->relhassubclass)
276                 do_analyze_rel(onerel, vacstmt, acquirefunc, relpages,
277                                            true, in_outer_xact, elevel);
278
279         /*
280          * Close source relation now, but keep lock so that no one deletes it
281          * before we commit.  (If someone did, they'd fail to clean up the entries
282          * we made in pg_statistic.  Also, releasing the lock before commit would
283          * expose us to concurrent-update failures in update_attstats.)
284          */
285         relation_close(onerel, NoLock);
286
287         /*
288          * Reset my PGXACT flag.  Note: we need this here, and not in vacuum_rel,
289          * because the vacuum flag is cleared by the end-of-xact code.
290          */
291         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
292         MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE;
293         LWLockRelease(ProcArrayLock);
294 }
295
296 /*
297  *      do_analyze_rel() -- analyze one relation, recursively or not
298  *
299  * Note that "acquirefunc" is only relevant for the non-inherited case.
300  * If we supported foreign tables in inheritance trees,
301  * acquire_inherited_sample_rows would need to determine the appropriate
302  * acquirefunc for each child table.
303  */
304 static void
305 do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
306                            AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
307                            bool inh, bool in_outer_xact, int elevel)
308 {
309         int                     attr_cnt,
310                                 tcnt,
311                                 i,
312                                 ind;
313         Relation   *Irel;
314         int                     nindexes;
315         bool            hasindex;
316         VacAttrStats **vacattrstats;
317         AnlIndexData *indexdata;
318         int                     targrows,
319                                 numrows;
320         double          totalrows,
321                                 totaldeadrows;
322         HeapTuple  *rows;
323         PGRUsage        ru0;
324         TimestampTz starttime = 0;
325         MemoryContext caller_context;
326         Oid                     save_userid;
327         int                     save_sec_context;
328         int                     save_nestlevel;
329
330         if (inh)
331                 ereport(elevel,
332                                 (errmsg("analyzing \"%s.%s\" inheritance tree",
333                                                 get_namespace_name(RelationGetNamespace(onerel)),
334                                                 RelationGetRelationName(onerel))));
335         else
336                 ereport(elevel,
337                                 (errmsg("analyzing \"%s.%s\"",
338                                                 get_namespace_name(RelationGetNamespace(onerel)),
339                                                 RelationGetRelationName(onerel))));
340
341         /*
342          * Set up a working context so that we can easily free whatever junk gets
343          * created.
344          */
345         anl_context = AllocSetContextCreate(CurrentMemoryContext,
346                                                                                 "Analyze",
347                                                                                 ALLOCSET_DEFAULT_MINSIZE,
348                                                                                 ALLOCSET_DEFAULT_INITSIZE,
349                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
350         caller_context = MemoryContextSwitchTo(anl_context);
351
352         /*
353          * Switch to the table owner's userid, so that any index functions are run
354          * as that user.  Also lock down security-restricted operations and
355          * arrange to make GUC variable changes local to this command.
356          */
357         GetUserIdAndSecContext(&save_userid, &save_sec_context);
358         SetUserIdAndSecContext(onerel->rd_rel->relowner,
359                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
360         save_nestlevel = NewGUCNestLevel();
361
362         /* measure elapsed time iff autovacuum logging requires it */
363         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
364         {
365                 pg_rusage_init(&ru0);
366                 if (Log_autovacuum_min_duration > 0)
367                         starttime = GetCurrentTimestamp();
368         }
369
370         /*
371          * Determine which columns to analyze
372          *
373          * Note that system attributes are never analyzed.
374          */
375         if (vacstmt->va_cols != NIL)
376         {
377                 ListCell   *le;
378
379                 vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
380                                                                                                 sizeof(VacAttrStats *));
381                 tcnt = 0;
382                 foreach(le, vacstmt->va_cols)
383                 {
384                         char       *col = strVal(lfirst(le));
385
386                         i = attnameAttNum(onerel, col, false);
387                         if (i == InvalidAttrNumber)
388                                 ereport(ERROR,
389                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
390                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
391                                                    col, RelationGetRelationName(onerel))));
392                         vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
393                         if (vacattrstats[tcnt] != NULL)
394                                 tcnt++;
395                 }
396                 attr_cnt = tcnt;
397         }
398         else
399         {
400                 attr_cnt = onerel->rd_att->natts;
401                 vacattrstats = (VacAttrStats **)
402                         palloc(attr_cnt * sizeof(VacAttrStats *));
403                 tcnt = 0;
404                 for (i = 1; i <= attr_cnt; i++)
405                 {
406                         vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
407                         if (vacattrstats[tcnt] != NULL)
408                                 tcnt++;
409                 }
410                 attr_cnt = tcnt;
411         }
412
413         /*
414          * Open all indexes of the relation, and see if there are any analyzable
415          * columns in the indexes.  We do not analyze index columns if there was
416          * an explicit column list in the ANALYZE command, however.  If we are
417          * doing a recursive scan, we don't want to touch the parent's indexes at
418          * all.
419          */
420         if (!inh)
421                 vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
422         else
423         {
424                 Irel = NULL;
425                 nindexes = 0;
426         }
427         hasindex = (nindexes > 0);
428         indexdata = NULL;
429         if (hasindex)
430         {
431                 indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
432                 for (ind = 0; ind < nindexes; ind++)
433                 {
434                         AnlIndexData *thisdata = &indexdata[ind];
435                         IndexInfo  *indexInfo;
436
437                         thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
438                         thisdata->tupleFract = 1.0; /* fix later if partial */
439                         if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL)
440                         {
441                                 ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
442
443                                 thisdata->vacattrstats = (VacAttrStats **)
444                                         palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
445                                 tcnt = 0;
446                                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
447                                 {
448                                         int                     keycol = indexInfo->ii_KeyAttrNumbers[i];
449
450                                         if (keycol == 0)
451                                         {
452                                                 /* Found an index expression */
453                                                 Node       *indexkey;
454
455                                                 if (indexpr_item == NULL)               /* shouldn't happen */
456                                                         elog(ERROR, "too few entries in indexprs list");
457                                                 indexkey = (Node *) lfirst(indexpr_item);
458                                                 indexpr_item = lnext(indexpr_item);
459                                                 thisdata->vacattrstats[tcnt] =
460                                                         examine_attribute(Irel[ind], i + 1, indexkey);
461                                                 if (thisdata->vacattrstats[tcnt] != NULL)
462                                                         tcnt++;
463                                         }
464                                 }
465                                 thisdata->attr_cnt = tcnt;
466                         }
467                 }
468         }
469
470         /*
471          * Determine how many rows we need to sample, using the worst case from
472          * all analyzable columns.  We use a lower bound of 100 rows to avoid
473          * possible overflow in Vitter's algorithm.  (Note: that will also be the
474          * target in the corner case where there are no analyzable columns.)
475          */
476         targrows = 100;
477         for (i = 0; i < attr_cnt; i++)
478         {
479                 if (targrows < vacattrstats[i]->minrows)
480                         targrows = vacattrstats[i]->minrows;
481         }
482         for (ind = 0; ind < nindexes; ind++)
483         {
484                 AnlIndexData *thisdata = &indexdata[ind];
485
486                 for (i = 0; i < thisdata->attr_cnt; i++)
487                 {
488                         if (targrows < thisdata->vacattrstats[i]->minrows)
489                                 targrows = thisdata->vacattrstats[i]->minrows;
490                 }
491         }
492
493         /*
494          * Acquire the sample rows
495          */
496         rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
497         if (inh)
498                 numrows = acquire_inherited_sample_rows(onerel, elevel,
499                                                                                                 rows, targrows,
500                                                                                                 &totalrows, &totaldeadrows);
501         else
502                 numrows = (*acquirefunc) (onerel, elevel,
503                                                                   rows, targrows,
504                                                                   &totalrows, &totaldeadrows);
505
506         /*
507          * Compute the statistics.  Temporary results during the calculations for
508          * each column are stored in a child context.  The calc routines are
509          * responsible to make sure that whatever they store into the VacAttrStats
510          * structure is allocated in anl_context.
511          */
512         if (numrows > 0)
513         {
514                 MemoryContext col_context,
515                                         old_context;
516
517                 col_context = AllocSetContextCreate(anl_context,
518                                                                                         "Analyze Column",
519                                                                                         ALLOCSET_DEFAULT_MINSIZE,
520                                                                                         ALLOCSET_DEFAULT_INITSIZE,
521                                                                                         ALLOCSET_DEFAULT_MAXSIZE);
522                 old_context = MemoryContextSwitchTo(col_context);
523
524                 for (i = 0; i < attr_cnt; i++)
525                 {
526                         VacAttrStats *stats = vacattrstats[i];
527                         AttributeOpts *aopt;
528
529                         stats->rows = rows;
530                         stats->tupDesc = onerel->rd_att;
531                         (*stats->compute_stats) (stats,
532                                                                          std_fetch_func,
533                                                                          numrows,
534                                                                          totalrows);
535
536                         /*
537                          * If the appropriate flavor of the n_distinct option is
538                          * specified, override with the corresponding value.
539                          */
540                         aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
541                         if (aopt != NULL)
542                         {
543                                 float8          n_distinct;
544
545                                 n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
546                                 if (n_distinct != 0.0)
547                                         stats->stadistinct = n_distinct;
548                         }
549
550                         MemoryContextResetAndDeleteChildren(col_context);
551                 }
552
553                 if (hasindex)
554                         compute_index_stats(onerel, totalrows,
555                                                                 indexdata, nindexes,
556                                                                 rows, numrows,
557                                                                 col_context);
558
559                 MemoryContextSwitchTo(old_context);
560                 MemoryContextDelete(col_context);
561
562                 /*
563                  * Emit the completed stats rows into pg_statistic, replacing any
564                  * previous statistics for the target columns.  (If there are stats in
565                  * pg_statistic for columns we didn't process, we leave them alone.)
566                  */
567                 update_attstats(RelationGetRelid(onerel), inh,
568                                                 attr_cnt, vacattrstats);
569
570                 for (ind = 0; ind < nindexes; ind++)
571                 {
572                         AnlIndexData *thisdata = &indexdata[ind];
573
574                         update_attstats(RelationGetRelid(Irel[ind]), false,
575                                                         thisdata->attr_cnt, thisdata->vacattrstats);
576                 }
577         }
578
579         /*
580          * Update pages/tuples stats in pg_class ... but not if we're doing
581          * inherited stats.
582          */
583         if (!inh)
584                 vac_update_relstats(onerel,
585                                                         relpages,
586                                                         totalrows,
587                                                         visibilitymap_count(onerel),
588                                                         hasindex,
589                                                         InvalidTransactionId,
590                                                         InvalidMultiXactId,
591                                                         in_outer_xact);
592
593         /*
594          * Same for indexes. Vacuum always scans all indexes, so if we're part of
595          * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
596          * VACUUM.
597          */
598         if (!inh && !(vacstmt->options & VACOPT_VACUUM))
599         {
600                 for (ind = 0; ind < nindexes; ind++)
601                 {
602                         AnlIndexData *thisdata = &indexdata[ind];
603                         double          totalindexrows;
604
605                         totalindexrows = ceil(thisdata->tupleFract * totalrows);
606                         vac_update_relstats(Irel[ind],
607                                                                 RelationGetNumberOfBlocks(Irel[ind]),
608                                                                 totalindexrows,
609                                                                 0,
610                                                                 false,
611                                                                 InvalidTransactionId,
612                                                                 InvalidMultiXactId,
613                                                                 in_outer_xact);
614                 }
615         }
616
617         /*
618          * Report ANALYZE to the stats collector, too.  However, if doing
619          * inherited stats we shouldn't report, because the stats collector only
620          * tracks per-table stats.
621          */
622         if (!inh)
623                 pgstat_report_analyze(onerel, totalrows, totaldeadrows);
624
625         /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
626         if (!(vacstmt->options & VACOPT_VACUUM))
627         {
628                 for (ind = 0; ind < nindexes; ind++)
629                 {
630                         IndexBulkDeleteResult *stats;
631                         IndexVacuumInfo ivinfo;
632
633                         ivinfo.index = Irel[ind];
634                         ivinfo.analyze_only = true;
635                         ivinfo.estimated_count = true;
636                         ivinfo.message_level = elevel;
637                         ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
638                         ivinfo.strategy = vac_strategy;
639
640                         stats = index_vacuum_cleanup(&ivinfo, NULL);
641
642                         if (stats)
643                                 pfree(stats);
644                 }
645         }
646
647         /* Done with indexes */
648         vac_close_indexes(nindexes, Irel, NoLock);
649
650         /* Log the action if appropriate */
651         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
652         {
653                 if (Log_autovacuum_min_duration == 0 ||
654                         TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
655                                                                            Log_autovacuum_min_duration))
656                         ereport(LOG,
657                                         (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s",
658                                                         get_database_name(MyDatabaseId),
659                                                         get_namespace_name(RelationGetNamespace(onerel)),
660                                                         RelationGetRelationName(onerel),
661                                                         pg_rusage_show(&ru0))));
662         }
663
664         /* Roll back any GUC changes executed by index functions */
665         AtEOXact_GUC(false, save_nestlevel);
666
667         /* Restore userid and security context */
668         SetUserIdAndSecContext(save_userid, save_sec_context);
669
670         /* Restore current context and release memory */
671         MemoryContextSwitchTo(caller_context);
672         MemoryContextDelete(anl_context);
673         anl_context = NULL;
674 }
675
676 /*
677  * Compute statistics about indexes of a relation
678  */
679 static void
680 compute_index_stats(Relation onerel, double totalrows,
681                                         AnlIndexData *indexdata, int nindexes,
682                                         HeapTuple *rows, int numrows,
683                                         MemoryContext col_context)
684 {
685         MemoryContext ind_context,
686                                 old_context;
687         Datum           values[INDEX_MAX_KEYS];
688         bool            isnull[INDEX_MAX_KEYS];
689         int                     ind,
690                                 i;
691
692         ind_context = AllocSetContextCreate(anl_context,
693                                                                                 "Analyze Index",
694                                                                                 ALLOCSET_DEFAULT_MINSIZE,
695                                                                                 ALLOCSET_DEFAULT_INITSIZE,
696                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
697         old_context = MemoryContextSwitchTo(ind_context);
698
699         for (ind = 0; ind < nindexes; ind++)
700         {
701                 AnlIndexData *thisdata = &indexdata[ind];
702                 IndexInfo  *indexInfo = thisdata->indexInfo;
703                 int                     attr_cnt = thisdata->attr_cnt;
704                 TupleTableSlot *slot;
705                 EState     *estate;
706                 ExprContext *econtext;
707                 List       *predicate;
708                 Datum      *exprvals;
709                 bool       *exprnulls;
710                 int                     numindexrows,
711                                         tcnt,
712                                         rowno;
713                 double          totalindexrows;
714
715                 /* Ignore index if no columns to analyze and not partial */
716                 if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
717                         continue;
718
719                 /*
720                  * Need an EState for evaluation of index expressions and
721                  * partial-index predicates.  Create it in the per-index context to be
722                  * sure it gets cleaned up at the bottom of the loop.
723                  */
724                 estate = CreateExecutorState();
725                 econtext = GetPerTupleExprContext(estate);
726                 /* Need a slot to hold the current heap tuple, too */
727                 slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel));
728
729                 /* Arrange for econtext's scan tuple to be the tuple under test */
730                 econtext->ecxt_scantuple = slot;
731
732                 /* Set up execution state for predicate. */
733                 predicate = (List *)
734                         ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
735                                                         estate);
736
737                 /* Compute and save index expression values */
738                 exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
739                 exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
740                 numindexrows = 0;
741                 tcnt = 0;
742                 for (rowno = 0; rowno < numrows; rowno++)
743                 {
744                         HeapTuple       heapTuple = rows[rowno];
745
746                         /*
747                          * Reset the per-tuple context each time, to reclaim any cruft
748                          * left behind by evaluating the predicate or index expressions.
749                          */
750                         ResetExprContext(econtext);
751
752                         /* Set up for predicate or expression evaluation */
753                         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
754
755                         /* If index is partial, check predicate */
756                         if (predicate != NIL)
757                         {
758                                 if (!ExecQual(predicate, econtext, false))
759                                         continue;
760                         }
761                         numindexrows++;
762
763                         if (attr_cnt > 0)
764                         {
765                                 /*
766                                  * Evaluate the index row to compute expression values. We
767                                  * could do this by hand, but FormIndexDatum is convenient.
768                                  */
769                                 FormIndexDatum(indexInfo,
770                                                            slot,
771                                                            estate,
772                                                            values,
773                                                            isnull);
774
775                                 /*
776                                  * Save just the columns we care about.  We copy the values
777                                  * into ind_context from the estate's per-tuple context.
778                                  */
779                                 for (i = 0; i < attr_cnt; i++)
780                                 {
781                                         VacAttrStats *stats = thisdata->vacattrstats[i];
782                                         int                     attnum = stats->attr->attnum;
783
784                                         if (isnull[attnum - 1])
785                                         {
786                                                 exprvals[tcnt] = (Datum) 0;
787                                                 exprnulls[tcnt] = true;
788                                         }
789                                         else
790                                         {
791                                                 exprvals[tcnt] = datumCopy(values[attnum - 1],
792                                                                                                    stats->attrtype->typbyval,
793                                                                                                    stats->attrtype->typlen);
794                                                 exprnulls[tcnt] = false;
795                                         }
796                                         tcnt++;
797                                 }
798                         }
799                 }
800
801                 /*
802                  * Having counted the number of rows that pass the predicate in the
803                  * sample, we can estimate the total number of rows in the index.
804                  */
805                 thisdata->tupleFract = (double) numindexrows / (double) numrows;
806                 totalindexrows = ceil(thisdata->tupleFract * totalrows);
807
808                 /*
809                  * Now we can compute the statistics for the expression columns.
810                  */
811                 if (numindexrows > 0)
812                 {
813                         MemoryContextSwitchTo(col_context);
814                         for (i = 0; i < attr_cnt; i++)
815                         {
816                                 VacAttrStats *stats = thisdata->vacattrstats[i];
817                                 AttributeOpts *aopt =
818                                 get_attribute_options(stats->attr->attrelid,
819                                                                           stats->attr->attnum);
820
821                                 stats->exprvals = exprvals + i;
822                                 stats->exprnulls = exprnulls + i;
823                                 stats->rowstride = attr_cnt;
824                                 (*stats->compute_stats) (stats,
825                                                                                  ind_fetch_func,
826                                                                                  numindexrows,
827                                                                                  totalindexrows);
828
829                                 /*
830                                  * If the n_distinct option is specified, it overrides the
831                                  * above computation.  For indices, we always use just
832                                  * n_distinct, not n_distinct_inherited.
833                                  */
834                                 if (aopt != NULL && aopt->n_distinct != 0.0)
835                                         stats->stadistinct = aopt->n_distinct;
836
837                                 MemoryContextResetAndDeleteChildren(col_context);
838                         }
839                 }
840
841                 /* And clean up */
842                 MemoryContextSwitchTo(ind_context);
843
844                 ExecDropSingleTupleTableSlot(slot);
845                 FreeExecutorState(estate);
846                 MemoryContextResetAndDeleteChildren(ind_context);
847         }
848
849         MemoryContextSwitchTo(old_context);
850         MemoryContextDelete(ind_context);
851 }
852
853 /*
854  * examine_attribute -- pre-analysis of a single column
855  *
856  * Determine whether the column is analyzable; if so, create and initialize
857  * a VacAttrStats struct for it.  If not, return NULL.
858  *
859  * If index_expr isn't NULL, then we're trying to analyze an expression index,
860  * and index_expr is the expression tree representing the column's data.
861  */
862 static VacAttrStats *
863 examine_attribute(Relation onerel, int attnum, Node *index_expr)
864 {
865         Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
866         HeapTuple       typtuple;
867         VacAttrStats *stats;
868         int                     i;
869         bool            ok;
870
871         /* Never analyze dropped columns */
872         if (attr->attisdropped)
873                 return NULL;
874
875         /* Don't analyze column if user has specified not to */
876         if (attr->attstattarget == 0)
877                 return NULL;
878
879         /*
880          * Create the VacAttrStats struct.  Note that we only have a copy of the
881          * fixed fields of the pg_attribute tuple.
882          */
883         stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
884         stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
885         memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
886
887         /*
888          * When analyzing an expression index, believe the expression tree's type
889          * not the column datatype --- the latter might be the opckeytype storage
890          * type of the opclass, which is not interesting for our purposes.  (Note:
891          * if we did anything with non-expression index columns, we'd need to
892          * figure out where to get the correct type info from, but for now that's
893          * not a problem.)      It's not clear whether anyone will care about the
894          * typmod, but we store that too just in case.
895          */
896         if (index_expr)
897         {
898                 stats->attrtypid = exprType(index_expr);
899                 stats->attrtypmod = exprTypmod(index_expr);
900         }
901         else
902         {
903                 stats->attrtypid = attr->atttypid;
904                 stats->attrtypmod = attr->atttypmod;
905         }
906
907         typtuple = SearchSysCacheCopy1(TYPEOID,
908                                                                    ObjectIdGetDatum(stats->attrtypid));
909         if (!HeapTupleIsValid(typtuple))
910                 elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
911         stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
912         stats->anl_context = anl_context;
913         stats->tupattnum = attnum;
914
915         /*
916          * The fields describing the stats->stavalues[n] element types default to
917          * the type of the data being analyzed, but the type-specific typanalyze
918          * function can change them if it wants to store something else.
919          */
920         for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
921         {
922                 stats->statypid[i] = stats->attrtypid;
923                 stats->statyplen[i] = stats->attrtype->typlen;
924                 stats->statypbyval[i] = stats->attrtype->typbyval;
925                 stats->statypalign[i] = stats->attrtype->typalign;
926         }
927
928         /*
929          * Call the type-specific typanalyze function.  If none is specified, use
930          * std_typanalyze().
931          */
932         if (OidIsValid(stats->attrtype->typanalyze))
933                 ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
934                                                                                    PointerGetDatum(stats)));
935         else
936                 ok = std_typanalyze(stats);
937
938         if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
939         {
940                 heap_freetuple(typtuple);
941                 pfree(stats->attr);
942                 pfree(stats);
943                 return NULL;
944         }
945
946         return stats;
947 }
948
949 /*
950  * BlockSampler_Init -- prepare for random sampling of blocknumbers
951  *
952  * BlockSampler is used for stage one of our new two-stage tuple
953  * sampling mechanism as discussed on pgsql-hackers 2004-04-02 (subject
954  * "Large DB").  It selects a random sample of samplesize blocks out of
955  * the nblocks blocks in the table.  If the table has less than
956  * samplesize blocks, all blocks are selected.
957  *
958  * Since we know the total number of blocks in advance, we can use the
959  * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
960  * algorithm.
961  */
962 static void
963 BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize)
964 {
965         bs->N = nblocks;                        /* measured table size */
966
967         /*
968          * If we decide to reduce samplesize for tables that have less or not much
969          * more than samplesize blocks, here is the place to do it.
970          */
971         bs->n = samplesize;
972         bs->t = 0;                                      /* blocks scanned so far */
973         bs->m = 0;                                      /* blocks selected so far */
974 }
975
976 static bool
977 BlockSampler_HasMore(BlockSampler bs)
978 {
979         return (bs->t < bs->N) && (bs->m < bs->n);
980 }
981
982 static BlockNumber
983 BlockSampler_Next(BlockSampler bs)
984 {
985         BlockNumber K = bs->N - bs->t;          /* remaining blocks */
986         int                     k = bs->n - bs->m;              /* blocks still to sample */
987         double          p;                              /* probability to skip block */
988         double          V;                              /* random */
989
990         Assert(BlockSampler_HasMore(bs));       /* hence K > 0 and k > 0 */
991
992         if ((BlockNumber) k >= K)
993         {
994                 /* need all the rest */
995                 bs->m++;
996                 return bs->t++;
997         }
998
999         /*----------
1000          * It is not obvious that this code matches Knuth's Algorithm S.
1001          * Knuth says to skip the current block with probability 1 - k/K.
1002          * If we are to skip, we should advance t (hence decrease K), and
1003          * repeat the same probabilistic test for the next block.  The naive
1004          * implementation thus requires an anl_random_fract() call for each block
1005          * number.  But we can reduce this to one anl_random_fract() call per
1006          * selected block, by noting that each time the while-test succeeds,
1007          * we can reinterpret V as a uniform random number in the range 0 to p.
1008          * Therefore, instead of choosing a new V, we just adjust p to be
1009          * the appropriate fraction of its former value, and our next loop
1010          * makes the appropriate probabilistic test.
1011          *
1012          * We have initially K > k > 0.  If the loop reduces K to equal k,
1013          * the next while-test must fail since p will become exactly zero
1014          * (we assume there will not be roundoff error in the division).
1015          * (Note: Knuth suggests a "<=" loop condition, but we use "<" just
1016          * to be doubly sure about roundoff error.)  Therefore K cannot become
1017          * less than k, which means that we cannot fail to select enough blocks.
1018          *----------
1019          */
1020         V = anl_random_fract();
1021         p = 1.0 - (double) k / (double) K;
1022         while (V < p)
1023         {
1024                 /* skip */
1025                 bs->t++;
1026                 K--;                                    /* keep K == N - t */
1027
1028                 /* adjust p to be new cutoff point in reduced range */
1029                 p *= 1.0 - (double) k / (double) K;
1030         }
1031
1032         /* select */
1033         bs->m++;
1034         return bs->t++;
1035 }
1036
1037 /*
1038  * acquire_sample_rows -- acquire a random sample of rows from the table
1039  *
1040  * Selected rows are returned in the caller-allocated array rows[], which
1041  * must have at least targrows entries.
1042  * The actual number of rows selected is returned as the function result.
1043  * We also estimate the total numbers of live and dead rows in the table,
1044  * and return them into *totalrows and *totaldeadrows, respectively.
1045  *
1046  * The returned list of tuples is in order by physical position in the table.
1047  * (We will rely on this later to derive correlation estimates.)
1048  *
1049  * As of May 2004 we use a new two-stage method:  Stage one selects up
1050  * to targrows random blocks (or all blocks, if there aren't so many).
1051  * Stage two scans these blocks and uses the Vitter algorithm to create
1052  * a random sample of targrows rows (or less, if there are less in the
1053  * sample of blocks).  The two stages are executed simultaneously: each
1054  * block is processed as soon as stage one returns its number and while
1055  * the rows are read stage two controls which ones are to be inserted
1056  * into the sample.
1057  *
1058  * Although every row has an equal chance of ending up in the final
1059  * sample, this sampling method is not perfect: not every possible
1060  * sample has an equal chance of being selected.  For large relations
1061  * the number of different blocks represented by the sample tends to be
1062  * too small.  We can live with that for now.  Improvements are welcome.
1063  *
1064  * An important property of this sampling method is that because we do
1065  * look at a statistically unbiased set of blocks, we should get
1066  * unbiased estimates of the average numbers of live and dead rows per
1067  * block.  The previous sampling method put too much credence in the row
1068  * density near the start of the table.
1069  */
1070 static int
1071 acquire_sample_rows(Relation onerel, int elevel,
1072                                         HeapTuple *rows, int targrows,
1073                                         double *totalrows, double *totaldeadrows)
1074 {
1075         int                     numrows = 0;    /* # rows now in reservoir */
1076         double          samplerows = 0; /* total # rows collected */
1077         double          liverows = 0;   /* # live rows seen */
1078         double          deadrows = 0;   /* # dead rows seen */
1079         double          rowstoskip = -1;        /* -1 means not set yet */
1080         BlockNumber totalblocks;
1081         TransactionId OldestXmin;
1082         BlockSamplerData bs;
1083         double          rstate;
1084
1085         Assert(targrows > 0);
1086
1087         totalblocks = RelationGetNumberOfBlocks(onerel);
1088
1089         /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
1090         OldestXmin = GetOldestXmin(onerel, true);
1091
1092         /* Prepare for sampling block numbers */
1093         BlockSampler_Init(&bs, totalblocks, targrows);
1094         /* Prepare for sampling rows */
1095         rstate = anl_init_selection_state(targrows);
1096
1097         /* Outer loop over blocks to sample */
1098         while (BlockSampler_HasMore(&bs))
1099         {
1100                 BlockNumber targblock = BlockSampler_Next(&bs);
1101                 Buffer          targbuffer;
1102                 Page            targpage;
1103                 OffsetNumber targoffset,
1104                                         maxoffset;
1105
1106                 vacuum_delay_point();
1107
1108                 /*
1109                  * We must maintain a pin on the target page's buffer to ensure that
1110                  * the maxoffset value stays good (else concurrent VACUUM might delete
1111                  * tuples out from under us).  Hence, pin the page until we are done
1112                  * looking at it.  We also choose to hold sharelock on the buffer
1113                  * throughout --- we could release and re-acquire sharelock for each
1114                  * tuple, but since we aren't doing much work per tuple, the extra
1115                  * lock traffic is probably better avoided.
1116                  */
1117                 targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
1118                                                                                 RBM_NORMAL, vac_strategy);
1119                 LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
1120                 targpage = BufferGetPage(targbuffer);
1121                 maxoffset = PageGetMaxOffsetNumber(targpage);
1122
1123                 /* Inner loop over all tuples on the selected page */
1124                 for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
1125                 {
1126                         ItemId          itemid;
1127                         HeapTupleData targtuple;
1128                         bool            sample_it = false;
1129
1130                         itemid = PageGetItemId(targpage, targoffset);
1131
1132                         /*
1133                          * We ignore unused and redirect line pointers.  DEAD line
1134                          * pointers should be counted as dead, because we need vacuum to
1135                          * run to get rid of them.  Note that this rule agrees with the
1136                          * way that heap_page_prune() counts things.
1137                          */
1138                         if (!ItemIdIsNormal(itemid))
1139                         {
1140                                 if (ItemIdIsDead(itemid))
1141                                         deadrows += 1;
1142                                 continue;
1143                         }
1144
1145                         ItemPointerSet(&targtuple.t_self, targblock, targoffset);
1146
1147                         targtuple.t_tableOid = RelationGetRelid(onerel);
1148                         targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1149                         targtuple.t_len = ItemIdGetLength(itemid);
1150
1151                         switch (HeapTupleSatisfiesVacuum(&targtuple,
1152                                                                                          OldestXmin,
1153                                                                                          targbuffer))
1154                         {
1155                                 case HEAPTUPLE_LIVE:
1156                                         sample_it = true;
1157                                         liverows += 1;
1158                                         break;
1159
1160                                 case HEAPTUPLE_DEAD:
1161                                 case HEAPTUPLE_RECENTLY_DEAD:
1162                                         /* Count dead and recently-dead rows */
1163                                         deadrows += 1;
1164                                         break;
1165
1166                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
1167
1168                                         /*
1169                                          * Insert-in-progress rows are not counted.  We assume
1170                                          * that when the inserting transaction commits or aborts,
1171                                          * it will send a stats message to increment the proper
1172                                          * count.  This works right only if that transaction ends
1173                                          * after we finish analyzing the table; if things happen
1174                                          * in the other order, its stats update will be
1175                                          * overwritten by ours.  However, the error will be large
1176                                          * only if the other transaction runs long enough to
1177                                          * insert many tuples, so assuming it will finish after us
1178                                          * is the safer option.
1179                                          *
1180                                          * A special case is that the inserting transaction might
1181                                          * be our own.  In this case we should count and sample
1182                                          * the row, to accommodate users who load a table and
1183                                          * analyze it in one transaction.  (pgstat_report_analyze
1184                                          * has to adjust the numbers we send to the stats
1185                                          * collector to make this come out right.)
1186                                          */
1187                                         if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
1188                                         {
1189                                                 sample_it = true;
1190                                                 liverows += 1;
1191                                         }
1192                                         break;
1193
1194                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
1195
1196                                         /*
1197                                          * We count delete-in-progress rows as still live, using
1198                                          * the same reasoning given above; but we don't bother to
1199                                          * include them in the sample.
1200                                          *
1201                                          * If the delete was done by our own transaction, however,
1202                                          * we must count the row as dead to make
1203                                          * pgstat_report_analyze's stats adjustments come out
1204                                          * right.  (Note: this works out properly when the row was
1205                                          * both inserted and deleted in our xact.)
1206                                          */
1207                                         if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
1208                                                 deadrows += 1;
1209                                         else
1210                                                 liverows += 1;
1211                                         break;
1212
1213                                 default:
1214                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1215                                         break;
1216                         }
1217
1218                         if (sample_it)
1219                         {
1220                                 /*
1221                                  * The first targrows sample rows are simply copied into the
1222                                  * reservoir. Then we start replacing tuples in the sample
1223                                  * until we reach the end of the relation.  This algorithm is
1224                                  * from Jeff Vitter's paper (see full citation below). It
1225                                  * works by repeatedly computing the number of tuples to skip
1226                                  * before selecting a tuple, which replaces a randomly chosen
1227                                  * element of the reservoir (current set of tuples).  At all
1228                                  * times the reservoir is a true random sample of the tuples
1229                                  * we've passed over so far, so when we fall off the end of
1230                                  * the relation we're done.
1231                                  */
1232                                 if (numrows < targrows)
1233                                         rows[numrows++] = heap_copytuple(&targtuple);
1234                                 else
1235                                 {
1236                                         /*
1237                                          * t in Vitter's paper is the number of records already
1238                                          * processed.  If we need to compute a new S value, we
1239                                          * must use the not-yet-incremented value of samplerows as
1240                                          * t.
1241                                          */
1242                                         if (rowstoskip < 0)
1243                                                 rowstoskip = anl_get_next_S(samplerows, targrows,
1244                                                                                                         &rstate);
1245
1246                                         if (rowstoskip <= 0)
1247                                         {
1248                                                 /*
1249                                                  * Found a suitable tuple, so save it, replacing one
1250                                                  * old tuple at random
1251                                                  */
1252                                                 int                     k = (int) (targrows * anl_random_fract());
1253
1254                                                 Assert(k >= 0 && k < targrows);
1255                                                 heap_freetuple(rows[k]);
1256                                                 rows[k] = heap_copytuple(&targtuple);
1257                                         }
1258
1259                                         rowstoskip -= 1;
1260                                 }
1261
1262                                 samplerows += 1;
1263                         }
1264                 }
1265
1266                 /* Now release the lock and pin on the page */
1267                 UnlockReleaseBuffer(targbuffer);
1268         }
1269
1270         /*
1271          * If we didn't find as many tuples as we wanted then we're done. No sort
1272          * is needed, since they're already in order.
1273          *
1274          * Otherwise we need to sort the collected tuples by position
1275          * (itempointer). It's not worth worrying about corner cases where the
1276          * tuples are already sorted.
1277          */
1278         if (numrows == targrows)
1279                 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
1280
1281         /*
1282          * Estimate total numbers of rows in relation.  For live rows, use
1283          * vac_estimate_reltuples; for dead rows, we have no source of old
1284          * information, so we have to assume the density is the same in unseen
1285          * pages as in the pages we scanned.
1286          */
1287         *totalrows = vac_estimate_reltuples(onerel, true,
1288                                                                                 totalblocks,
1289                                                                                 bs.m,
1290                                                                                 liverows);
1291         if (bs.m > 0)
1292                 *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
1293         else
1294                 *totaldeadrows = 0.0;
1295
1296         /*
1297          * Emit some interesting relation info
1298          */
1299         ereport(elevel,
1300                         (errmsg("\"%s\": scanned %d of %u pages, "
1301                                         "containing %.0f live rows and %.0f dead rows; "
1302                                         "%d rows in sample, %.0f estimated total rows",
1303                                         RelationGetRelationName(onerel),
1304                                         bs.m, totalblocks,
1305                                         liverows, deadrows,
1306                                         numrows, *totalrows)));
1307
1308         return numrows;
1309 }
1310
1311 /* Select a random value R uniformly distributed in (0 - 1) */
1312 double
1313 anl_random_fract(void)
1314 {
1315         return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
1316 }
1317
1318 /*
1319  * These two routines embody Algorithm Z from "Random sampling with a
1320  * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
1321  * (Mar. 1985), Pages 37-57.  Vitter describes his algorithm in terms
1322  * of the count S of records to skip before processing another record.
1323  * It is computed primarily based on t, the number of records already read.
1324  * The only extra state needed between calls is W, a random state variable.
1325  *
1326  * anl_init_selection_state computes the initial W value.
1327  *
1328  * Given that we've already read t records (t >= n), anl_get_next_S
1329  * determines the number of records to skip before the next record is
1330  * processed.
1331  */
1332 double
1333 anl_init_selection_state(int n)
1334 {
1335         /* Initial value of W (for use when Algorithm Z is first applied) */
1336         return exp(-log(anl_random_fract()) / n);
1337 }
1338
1339 double
1340 anl_get_next_S(double t, int n, double *stateptr)
1341 {
1342         double          S;
1343
1344         /* The magic constant here is T from Vitter's paper */
1345         if (t <= (22.0 * n))
1346         {
1347                 /* Process records using Algorithm X until t is large enough */
1348                 double          V,
1349                                         quot;
1350
1351                 V = anl_random_fract(); /* Generate V */
1352                 S = 0;
1353                 t += 1;
1354                 /* Note: "num" in Vitter's code is always equal to t - n */
1355                 quot = (t - (double) n) / t;
1356                 /* Find min S satisfying (4.1) */
1357                 while (quot > V)
1358                 {
1359                         S += 1;
1360                         t += 1;
1361                         quot *= (t - (double) n) / t;
1362                 }
1363         }
1364         else
1365         {
1366                 /* Now apply Algorithm Z */
1367                 double          W = *stateptr;
1368                 double          term = t - (double) n + 1;
1369
1370                 for (;;)
1371                 {
1372                         double          numer,
1373                                                 numer_lim,
1374                                                 denom;
1375                         double          U,
1376                                                 X,
1377                                                 lhs,
1378                                                 rhs,
1379                                                 y,
1380                                                 tmp;
1381
1382                         /* Generate U and X */
1383                         U = anl_random_fract();
1384                         X = t * (W - 1.0);
1385                         S = floor(X);           /* S is tentatively set to floor(X) */
1386                         /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
1387                         tmp = (t + 1) / term;
1388                         lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
1389                         rhs = (((t + X) / (term + S)) * term) / t;
1390                         if (lhs <= rhs)
1391                         {
1392                                 W = rhs / lhs;
1393                                 break;
1394                         }
1395                         /* Test if U <= f(S)/cg(X) */
1396                         y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
1397                         if ((double) n < S)
1398                         {
1399                                 denom = t;
1400                                 numer_lim = term + S;
1401                         }
1402                         else
1403                         {
1404                                 denom = t - (double) n + S;
1405                                 numer_lim = t + 1;
1406                         }
1407                         for (numer = t + S; numer >= numer_lim; numer -= 1)
1408                         {
1409                                 y *= numer / denom;
1410                                 denom -= 1;
1411                         }
1412                         W = exp(-log(anl_random_fract()) / n);          /* Generate W in advance */
1413                         if (exp(log(y) / n) <= (t + X) / t)
1414                                 break;
1415                 }
1416                 *stateptr = W;
1417         }
1418         return S;
1419 }
1420
1421 /*
1422  * qsort comparator for sorting rows[] array
1423  */
1424 static int
1425 compare_rows(const void *a, const void *b)
1426 {
1427         HeapTuple       ha = *(const HeapTuple *) a;
1428         HeapTuple       hb = *(const HeapTuple *) b;
1429         BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
1430         OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
1431         BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
1432         OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
1433
1434         if (ba < bb)
1435                 return -1;
1436         if (ba > bb)
1437                 return 1;
1438         if (oa < ob)
1439                 return -1;
1440         if (oa > ob)
1441                 return 1;
1442         return 0;
1443 }
1444
1445
1446 /*
1447  * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
1448  *
1449  * This has the same API as acquire_sample_rows, except that rows are
1450  * collected from all inheritance children as well as the specified table.
1451  * We fail and return zero if there are no inheritance children.
1452  */
1453 static int
1454 acquire_inherited_sample_rows(Relation onerel, int elevel,
1455                                                           HeapTuple *rows, int targrows,
1456                                                           double *totalrows, double *totaldeadrows)
1457 {
1458         List       *tableOIDs;
1459         Relation   *rels;
1460         double     *relblocks;
1461         double          totalblocks;
1462         int                     numrows,
1463                                 nrels,
1464                                 i;
1465         ListCell   *lc;
1466
1467         /*
1468          * Find all members of inheritance set.  We only need AccessShareLock on
1469          * the children.
1470          */
1471         tableOIDs =
1472                 find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
1473
1474         /*
1475          * Check that there's at least one descendant, else fail.  This could
1476          * happen despite analyze_rel's relhassubclass check, if table once had a
1477          * child but no longer does.  In that case, we can clear the
1478          * relhassubclass field so as not to make the same mistake again later.
1479          * (This is safe because we hold ShareUpdateExclusiveLock.)
1480          */
1481         if (list_length(tableOIDs) < 2)
1482         {
1483                 /* CCI because we already updated the pg_class row in this command */
1484                 CommandCounterIncrement();
1485                 SetRelationHasSubclass(RelationGetRelid(onerel), false);
1486                 ereport(elevel,
1487                                 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
1488                                                 get_namespace_name(RelationGetNamespace(onerel)),
1489                                                 RelationGetRelationName(onerel))));
1490                 return 0;
1491         }
1492
1493         /*
1494          * Count the blocks in all the relations.  The result could overflow
1495          * BlockNumber, so we use double arithmetic.
1496          */
1497         rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
1498         relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
1499         totalblocks = 0;
1500         nrels = 0;
1501         foreach(lc, tableOIDs)
1502         {
1503                 Oid                     childOID = lfirst_oid(lc);
1504                 Relation        childrel;
1505
1506                 /* We already got the needed lock */
1507                 childrel = heap_open(childOID, NoLock);
1508
1509                 /* Ignore if temp table of another backend */
1510                 if (RELATION_IS_OTHER_TEMP(childrel))
1511                 {
1512                         /* ... but release the lock on it */
1513                         Assert(childrel != onerel);
1514                         heap_close(childrel, AccessShareLock);
1515                         continue;
1516                 }
1517
1518                 rels[nrels] = childrel;
1519                 relblocks[nrels] = (double) RelationGetNumberOfBlocks(childrel);
1520                 totalblocks += relblocks[nrels];
1521                 nrels++;
1522         }
1523
1524         /*
1525          * Now sample rows from each relation, proportionally to its fraction of
1526          * the total block count.  (This might be less than desirable if the child
1527          * rels have radically different free-space percentages, but it's not
1528          * clear that it's worth working harder.)
1529          */
1530         numrows = 0;
1531         *totalrows = 0;
1532         *totaldeadrows = 0;
1533         for (i = 0; i < nrels; i++)
1534         {
1535                 Relation        childrel = rels[i];
1536                 double          childblocks = relblocks[i];
1537
1538                 if (childblocks > 0)
1539                 {
1540                         int                     childtargrows;
1541
1542                         childtargrows = (int) rint(targrows * childblocks / totalblocks);
1543                         /* Make sure we don't overrun due to roundoff error */
1544                         childtargrows = Min(childtargrows, targrows - numrows);
1545                         if (childtargrows > 0)
1546                         {
1547                                 int                     childrows;
1548                                 double          trows,
1549                                                         tdrows;
1550
1551                                 /* Fetch a random sample of the child's rows */
1552                                 childrows = acquire_sample_rows(childrel,
1553                                                                                                 elevel,
1554                                                                                                 rows + numrows,
1555                                                                                                 childtargrows,
1556                                                                                                 &trows,
1557                                                                                                 &tdrows);
1558
1559                                 /* We may need to convert from child's rowtype to parent's */
1560                                 if (childrows > 0 &&
1561                                         !equalTupleDescs(RelationGetDescr(childrel),
1562                                                                          RelationGetDescr(onerel)))
1563                                 {
1564                                         TupleConversionMap *map;
1565
1566                                         map = convert_tuples_by_name(RelationGetDescr(childrel),
1567                                                                                                  RelationGetDescr(onerel),
1568                                                                  gettext_noop("could not convert row type"));
1569                                         if (map != NULL)
1570                                         {
1571                                                 int                     j;
1572
1573                                                 for (j = 0; j < childrows; j++)
1574                                                 {
1575                                                         HeapTuple       newtup;
1576
1577                                                         newtup = do_convert_tuple(rows[numrows + j], map);
1578                                                         heap_freetuple(rows[numrows + j]);
1579                                                         rows[numrows + j] = newtup;
1580                                                 }
1581                                                 free_conversion_map(map);
1582                                         }
1583                                 }
1584
1585                                 /* And add to counts */
1586                                 numrows += childrows;
1587                                 *totalrows += trows;
1588                                 *totaldeadrows += tdrows;
1589                         }
1590                 }
1591
1592                 /*
1593                  * Note: we cannot release the child-table locks, since we may have
1594                  * pointers to their TOAST tables in the sampled rows.
1595                  */
1596                 heap_close(childrel, NoLock);
1597         }
1598
1599         return numrows;
1600 }
1601
1602
1603 /*
1604  *      update_attstats() -- update attribute statistics for one relation
1605  *
1606  *              Statistics are stored in several places: the pg_class row for the
1607  *              relation has stats about the whole relation, and there is a
1608  *              pg_statistic row for each (non-system) attribute that has ever
1609  *              been analyzed.  The pg_class values are updated by VACUUM, not here.
1610  *
1611  *              pg_statistic rows are just added or updated normally.  This means
1612  *              that pg_statistic will probably contain some deleted rows at the
1613  *              completion of a vacuum cycle, unless it happens to get vacuumed last.
1614  *
1615  *              To keep things simple, we punt for pg_statistic, and don't try
1616  *              to compute or store rows for pg_statistic itself in pg_statistic.
1617  *              This could possibly be made to work, but it's not worth the trouble.
1618  *              Note analyze_rel() has seen to it that we won't come here when
1619  *              vacuuming pg_statistic itself.
1620  *
1621  *              Note: there would be a race condition here if two backends could
1622  *              ANALYZE the same table concurrently.  Presently, we lock that out
1623  *              by taking a self-exclusive lock on the relation in analyze_rel().
1624  */
1625 static void
1626 update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
1627 {
1628         Relation        sd;
1629         int                     attno;
1630
1631         if (natts <= 0)
1632                 return;                                 /* nothing to do */
1633
1634         sd = heap_open(StatisticRelationId, RowExclusiveLock);
1635
1636         for (attno = 0; attno < natts; attno++)
1637         {
1638                 VacAttrStats *stats = vacattrstats[attno];
1639                 HeapTuple       stup,
1640                                         oldtup;
1641                 int                     i,
1642                                         k,
1643                                         n;
1644                 Datum           values[Natts_pg_statistic];
1645                 bool            nulls[Natts_pg_statistic];
1646                 bool            replaces[Natts_pg_statistic];
1647
1648                 /* Ignore attr if we weren't able to collect stats */
1649                 if (!stats->stats_valid)
1650                         continue;
1651
1652                 /*
1653                  * Construct a new pg_statistic tuple
1654                  */
1655                 for (i = 0; i < Natts_pg_statistic; ++i)
1656                 {
1657                         nulls[i] = false;
1658                         replaces[i] = true;
1659                 }
1660
1661                 values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
1662                 values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
1663                 values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
1664                 values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
1665                 values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
1666                 values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
1667                 i = Anum_pg_statistic_stakind1 - 1;
1668                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1669                 {
1670                         values[i++] = Int16GetDatum(stats->stakind[k]);         /* stakindN */
1671                 }
1672                 i = Anum_pg_statistic_staop1 - 1;
1673                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1674                 {
1675                         values[i++] = ObjectIdGetDatum(stats->staop[k]);        /* staopN */
1676                 }
1677                 i = Anum_pg_statistic_stanumbers1 - 1;
1678                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1679                 {
1680                         int                     nnum = stats->numnumbers[k];
1681
1682                         if (nnum > 0)
1683                         {
1684                                 Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
1685                                 ArrayType  *arry;
1686
1687                                 for (n = 0; n < nnum; n++)
1688                                         numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
1689                                 /* XXX knows more than it should about type float4: */
1690                                 arry = construct_array(numdatums, nnum,
1691                                                                            FLOAT4OID,
1692                                                                            sizeof(float4), FLOAT4PASSBYVAL, 'i');
1693                                 values[i++] = PointerGetDatum(arry);    /* stanumbersN */
1694                         }
1695                         else
1696                         {
1697                                 nulls[i] = true;
1698                                 values[i++] = (Datum) 0;
1699                         }
1700                 }
1701                 i = Anum_pg_statistic_stavalues1 - 1;
1702                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1703                 {
1704                         if (stats->numvalues[k] > 0)
1705                         {
1706                                 ArrayType  *arry;
1707
1708                                 arry = construct_array(stats->stavalues[k],
1709                                                                            stats->numvalues[k],
1710                                                                            stats->statypid[k],
1711                                                                            stats->statyplen[k],
1712                                                                            stats->statypbyval[k],
1713                                                                            stats->statypalign[k]);
1714                                 values[i++] = PointerGetDatum(arry);    /* stavaluesN */
1715                         }
1716                         else
1717                         {
1718                                 nulls[i] = true;
1719                                 values[i++] = (Datum) 0;
1720                         }
1721                 }
1722
1723                 /* Is there already a pg_statistic tuple for this attribute? */
1724                 oldtup = SearchSysCache3(STATRELATTINH,
1725                                                                  ObjectIdGetDatum(relid),
1726                                                                  Int16GetDatum(stats->attr->attnum),
1727                                                                  BoolGetDatum(inh));
1728
1729                 if (HeapTupleIsValid(oldtup))
1730                 {
1731                         /* Yes, replace it */
1732                         stup = heap_modify_tuple(oldtup,
1733                                                                          RelationGetDescr(sd),
1734                                                                          values,
1735                                                                          nulls,
1736                                                                          replaces);
1737                         ReleaseSysCache(oldtup);
1738                         simple_heap_update(sd, &stup->t_self, stup);
1739                 }
1740                 else
1741                 {
1742                         /* No, insert new tuple */
1743                         stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
1744                         simple_heap_insert(sd, stup);
1745                 }
1746
1747                 /* update indexes too */
1748                 CatalogUpdateIndexes(sd, stup);
1749
1750                 heap_freetuple(stup);
1751         }
1752
1753         heap_close(sd, RowExclusiveLock);
1754 }
1755
1756 /*
1757  * Standard fetch function for use by compute_stats subroutines.
1758  *
1759  * This exists to provide some insulation between compute_stats routines
1760  * and the actual storage of the sample data.
1761  */
1762 static Datum
1763 std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1764 {
1765         int                     attnum = stats->tupattnum;
1766         HeapTuple       tuple = stats->rows[rownum];
1767         TupleDesc       tupDesc = stats->tupDesc;
1768
1769         return heap_getattr(tuple, attnum, tupDesc, isNull);
1770 }
1771
1772 /*
1773  * Fetch function for analyzing index expressions.
1774  *
1775  * We have not bothered to construct index tuples, instead the data is
1776  * just in Datum arrays.
1777  */
1778 static Datum
1779 ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1780 {
1781         int                     i;
1782
1783         /* exprvals and exprnulls are already offset for proper column */
1784         i = rownum * stats->rowstride;
1785         *isNull = stats->exprnulls[i];
1786         return stats->exprvals[i];
1787 }
1788
1789
1790 /*==========================================================================
1791  *
1792  * Code below this point represents the "standard" type-specific statistics
1793  * analysis algorithms.  This code can be replaced on a per-data-type basis
1794  * by setting a nonzero value in pg_type.typanalyze.
1795  *
1796  *==========================================================================
1797  */
1798
1799
1800 /*
1801  * To avoid consuming too much memory during analysis and/or too much space
1802  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
1803  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
1804  * and distinct-value calculations since a wide value is unlikely to be
1805  * duplicated at all, much less be a most-common value.  For the same reason,
1806  * ignoring wide values will not affect our estimates of histogram bin
1807  * boundaries very much.
1808  */
1809 #define WIDTH_THRESHOLD  1024
1810
1811 #define swapInt(a,b)    do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1812 #define swapDatum(a,b)  do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1813
1814 /*
1815  * Extra information used by the default analysis routines
1816  */
1817 typedef struct
1818 {
1819         Oid                     eqopr;                  /* '=' operator for datatype, if any */
1820         Oid                     eqfunc;                 /* and associated function */
1821         Oid                     ltopr;                  /* '<' operator for datatype, if any */
1822 } StdAnalyzeData;
1823
1824 typedef struct
1825 {
1826         Datum           value;                  /* a data value */
1827         int                     tupno;                  /* position index for tuple it came from */
1828 } ScalarItem;
1829
1830 typedef struct
1831 {
1832         int                     count;                  /* # of duplicates */
1833         int                     first;                  /* values[] index of first occurrence */
1834 } ScalarMCVItem;
1835
1836 typedef struct
1837 {
1838         SortSupport ssup;
1839         int                *tupnoLink;
1840 } CompareScalarsContext;
1841
1842
1843 static void compute_minimal_stats(VacAttrStatsP stats,
1844                                           AnalyzeAttrFetchFunc fetchfunc,
1845                                           int samplerows,
1846                                           double totalrows);
1847 static void compute_scalar_stats(VacAttrStatsP stats,
1848                                          AnalyzeAttrFetchFunc fetchfunc,
1849                                          int samplerows,
1850                                          double totalrows);
1851 static int      compare_scalars(const void *a, const void *b, void *arg);
1852 static int      compare_mcvs(const void *a, const void *b);
1853
1854
1855 /*
1856  * std_typanalyze -- the default type-specific typanalyze function
1857  */
1858 bool
1859 std_typanalyze(VacAttrStats *stats)
1860 {
1861         Form_pg_attribute attr = stats->attr;
1862         Oid                     ltopr;
1863         Oid                     eqopr;
1864         StdAnalyzeData *mystats;
1865
1866         /* If the attstattarget column is negative, use the default value */
1867         /* NB: it is okay to scribble on stats->attr since it's a copy */
1868         if (attr->attstattarget < 0)
1869                 attr->attstattarget = default_statistics_target;
1870
1871         /* Look for default "<" and "=" operators for column's type */
1872         get_sort_group_operators(stats->attrtypid,
1873                                                          false, false, false,
1874                                                          &ltopr, &eqopr, NULL,
1875                                                          NULL);
1876
1877         /* If column has no "=" operator, we can't do much of anything */
1878         if (!OidIsValid(eqopr))
1879                 return false;
1880
1881         /* Save the operator info for compute_stats routines */
1882         mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
1883         mystats->eqopr = eqopr;
1884         mystats->eqfunc = get_opcode(eqopr);
1885         mystats->ltopr = ltopr;
1886         stats->extra_data = mystats;
1887
1888         /*
1889          * Determine which standard statistics algorithm to use
1890          */
1891         if (OidIsValid(ltopr))
1892         {
1893                 /* Seems to be a scalar datatype */
1894                 stats->compute_stats = compute_scalar_stats;
1895                 /*--------------------
1896                  * The following choice of minrows is based on the paper
1897                  * "Random sampling for histogram construction: how much is enough?"
1898                  * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
1899                  * Proceedings of ACM SIGMOD International Conference on Management
1900                  * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
1901                  * says that for table size n, histogram size k, maximum relative
1902                  * error in bin size f, and error probability gamma, the minimum
1903                  * random sample size is
1904                  *              r = 4 * k * ln(2*n/gamma) / f^2
1905                  * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
1906                  *              r = 305.82 * k
1907                  * Note that because of the log function, the dependence on n is
1908                  * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
1909                  * bin size error with probability 0.99.  So there's no real need to
1910                  * scale for n, which is a good thing because we don't necessarily
1911                  * know it at this point.
1912                  *--------------------
1913                  */
1914                 stats->minrows = 300 * attr->attstattarget;
1915         }
1916         else
1917         {
1918                 /* Can't do much but the minimal stuff */
1919                 stats->compute_stats = compute_minimal_stats;
1920                 /* Might as well use the same minrows as above */
1921                 stats->minrows = 300 * attr->attstattarget;
1922         }
1923
1924         return true;
1925 }
1926
1927 /*
1928  *      compute_minimal_stats() -- compute minimal column statistics
1929  *
1930  *      We use this when we can find only an "=" operator for the datatype.
1931  *
1932  *      We determine the fraction of non-null rows, the average width, the
1933  *      most common values, and the (estimated) number of distinct values.
1934  *
1935  *      The most common values are determined by brute force: we keep a list
1936  *      of previously seen values, ordered by number of times seen, as we scan
1937  *      the samples.  A newly seen value is inserted just after the last
1938  *      multiply-seen value, causing the bottommost (oldest) singly-seen value
1939  *      to drop off the list.  The accuracy of this method, and also its cost,
1940  *      depend mainly on the length of the list we are willing to keep.
1941  */
1942 static void
1943 compute_minimal_stats(VacAttrStatsP stats,
1944                                           AnalyzeAttrFetchFunc fetchfunc,
1945                                           int samplerows,
1946                                           double totalrows)
1947 {
1948         int                     i;
1949         int                     null_cnt = 0;
1950         int                     nonnull_cnt = 0;
1951         int                     toowide_cnt = 0;
1952         double          total_width = 0;
1953         bool            is_varlena = (!stats->attrtype->typbyval &&
1954                                                           stats->attrtype->typlen == -1);
1955         bool            is_varwidth = (!stats->attrtype->typbyval &&
1956                                                            stats->attrtype->typlen < 0);
1957         FmgrInfo        f_cmpeq;
1958         typedef struct
1959         {
1960                 Datum           value;
1961                 int                     count;
1962         } TrackItem;
1963         TrackItem  *track;
1964         int                     track_cnt,
1965                                 track_max;
1966         int                     num_mcv = stats->attr->attstattarget;
1967         StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
1968
1969         /*
1970          * We track up to 2*n values for an n-element MCV list; but at least 10
1971          */
1972         track_max = 2 * num_mcv;
1973         if (track_max < 10)
1974                 track_max = 10;
1975         track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
1976         track_cnt = 0;
1977
1978         fmgr_info(mystats->eqfunc, &f_cmpeq);
1979
1980         for (i = 0; i < samplerows; i++)
1981         {
1982                 Datum           value;
1983                 bool            isnull;
1984                 bool            match;
1985                 int                     firstcount1,
1986                                         j;
1987
1988                 vacuum_delay_point();
1989
1990                 value = fetchfunc(stats, i, &isnull);
1991
1992                 /* Check for null/nonnull */
1993                 if (isnull)
1994                 {
1995                         null_cnt++;
1996                         continue;
1997                 }
1998                 nonnull_cnt++;
1999
2000                 /*
2001                  * If it's a variable-width field, add up widths for average width
2002                  * calculation.  Note that if the value is toasted, we use the toasted
2003                  * width.  We don't bother with this calculation if it's a fixed-width
2004                  * type.
2005                  */
2006                 if (is_varlena)
2007                 {
2008                         total_width += VARSIZE_ANY(DatumGetPointer(value));
2009
2010                         /*
2011                          * If the value is toasted, we want to detoast it just once to
2012                          * avoid repeated detoastings and resultant excess memory usage
2013                          * during the comparisons.  Also, check to see if the value is
2014                          * excessively wide, and if so don't detoast at all --- just
2015                          * ignore the value.
2016                          */
2017                         if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2018                         {
2019                                 toowide_cnt++;
2020                                 continue;
2021                         }
2022                         value = PointerGetDatum(PG_DETOAST_DATUM(value));
2023                 }
2024                 else if (is_varwidth)
2025                 {
2026                         /* must be cstring */
2027                         total_width += strlen(DatumGetCString(value)) + 1;
2028                 }
2029
2030                 /*
2031                  * See if the value matches anything we're already tracking.
2032                  */
2033                 match = false;
2034                 firstcount1 = track_cnt;
2035                 for (j = 0; j < track_cnt; j++)
2036                 {
2037                         /* We always use the default collation for statistics */
2038                         if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
2039                                                                                            DEFAULT_COLLATION_OID,
2040                                                                                            value, track[j].value)))
2041                         {
2042                                 match = true;
2043                                 break;
2044                         }
2045                         if (j < firstcount1 && track[j].count == 1)
2046                                 firstcount1 = j;
2047                 }
2048
2049                 if (match)
2050                 {
2051                         /* Found a match */
2052                         track[j].count++;
2053                         /* This value may now need to "bubble up" in the track list */
2054                         while (j > 0 && track[j].count > track[j - 1].count)
2055                         {
2056                                 swapDatum(track[j].value, track[j - 1].value);
2057                                 swapInt(track[j].count, track[j - 1].count);
2058                                 j--;
2059                         }
2060                 }
2061                 else
2062                 {
2063                         /* No match.  Insert at head of count-1 list */
2064                         if (track_cnt < track_max)
2065                                 track_cnt++;
2066                         for (j = track_cnt - 1; j > firstcount1; j--)
2067                         {
2068                                 track[j].value = track[j - 1].value;
2069                                 track[j].count = track[j - 1].count;
2070                         }
2071                         if (firstcount1 < track_cnt)
2072                         {
2073                                 track[firstcount1].value = value;
2074                                 track[firstcount1].count = 1;
2075                         }
2076                 }
2077         }
2078
2079         /* We can only compute real stats if we found some non-null values. */
2080         if (nonnull_cnt > 0)
2081         {
2082                 int                     nmultiple,
2083                                         summultiple;
2084
2085                 stats->stats_valid = true;
2086                 /* Do the simple null-frac and width stats */
2087                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
2088                 if (is_varwidth)
2089                         stats->stawidth = total_width / (double) nonnull_cnt;
2090                 else
2091                         stats->stawidth = stats->attrtype->typlen;
2092
2093                 /* Count the number of values we found multiple times */
2094                 summultiple = 0;
2095                 for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
2096                 {
2097                         if (track[nmultiple].count == 1)
2098                                 break;
2099                         summultiple += track[nmultiple].count;
2100                 }
2101
2102                 if (nmultiple == 0)
2103                 {
2104                         /* If we found no repeated values, assume it's a unique column */
2105                         stats->stadistinct = -1.0;
2106                 }
2107                 else if (track_cnt < track_max && toowide_cnt == 0 &&
2108                                  nmultiple == track_cnt)
2109                 {
2110                         /*
2111                          * Our track list includes every value in the sample, and every
2112                          * value appeared more than once.  Assume the column has just
2113                          * these values.
2114                          */
2115                         stats->stadistinct = track_cnt;
2116                 }
2117                 else
2118                 {
2119                         /*----------
2120                          * Estimate the number of distinct values using the estimator
2121                          * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2122                          *              n*d / (n - f1 + f1*n/N)
2123                          * where f1 is the number of distinct values that occurred
2124                          * exactly once in our sample of n rows (from a total of N),
2125                          * and d is the total number of distinct values in the sample.
2126                          * This is their Duj1 estimator; the other estimators they
2127                          * recommend are considerably more complex, and are numerically
2128                          * very unstable when n is much smaller than N.
2129                          *
2130                          * We assume (not very reliably!) that all the multiply-occurring
2131                          * values are reflected in the final track[] list, and the other
2132                          * nonnull values all appeared but once.  (XXX this usually
2133                          * results in a drastic overestimate of ndistinct.  Can we do
2134                          * any better?)
2135                          *----------
2136                          */
2137                         int                     f1 = nonnull_cnt - summultiple;
2138                         int                     d = f1 + nmultiple;
2139                         double          numer,
2140                                                 denom,
2141                                                 stadistinct;
2142
2143                         numer = (double) samplerows *(double) d;
2144
2145                         denom = (double) (samplerows - f1) +
2146                                 (double) f1 *(double) samplerows / totalrows;
2147
2148                         stadistinct = numer / denom;
2149                         /* Clamp to sane range in case of roundoff error */
2150                         if (stadistinct < (double) d)
2151                                 stadistinct = (double) d;
2152                         if (stadistinct > totalrows)
2153                                 stadistinct = totalrows;
2154                         stats->stadistinct = floor(stadistinct + 0.5);
2155                 }
2156
2157                 /*
2158                  * If we estimated the number of distinct values at more than 10% of
2159                  * the total row count (a very arbitrary limit), then assume that
2160                  * stadistinct should scale with the row count rather than be a fixed
2161                  * value.
2162                  */
2163                 if (stats->stadistinct > 0.1 * totalrows)
2164                         stats->stadistinct = -(stats->stadistinct / totalrows);
2165
2166                 /*
2167                  * Decide how many values are worth storing as most-common values. If
2168                  * we are able to generate a complete MCV list (all the values in the
2169                  * sample will fit, and we think these are all the ones in the table),
2170                  * then do so.  Otherwise, store only those values that are
2171                  * significantly more common than the (estimated) average. We set the
2172                  * threshold rather arbitrarily at 25% more than average, with at
2173                  * least 2 instances in the sample.
2174                  */
2175                 if (track_cnt < track_max && toowide_cnt == 0 &&
2176                         stats->stadistinct > 0 &&
2177                         track_cnt <= num_mcv)
2178                 {
2179                         /* Track list includes all values seen, and all will fit */
2180                         num_mcv = track_cnt;
2181                 }
2182                 else
2183                 {
2184                         double          ndistinct = stats->stadistinct;
2185                         double          avgcount,
2186                                                 mincount;
2187
2188                         if (ndistinct < 0)
2189                                 ndistinct = -ndistinct * totalrows;
2190                         /* estimate # of occurrences in sample of a typical value */
2191                         avgcount = (double) samplerows / ndistinct;
2192                         /* set minimum threshold count to store a value */
2193                         mincount = avgcount * 1.25;
2194                         if (mincount < 2)
2195                                 mincount = 2;
2196                         if (num_mcv > track_cnt)
2197                                 num_mcv = track_cnt;
2198                         for (i = 0; i < num_mcv; i++)
2199                         {
2200                                 if (track[i].count < mincount)
2201                                 {
2202                                         num_mcv = i;
2203                                         break;
2204                                 }
2205                         }
2206                 }
2207
2208                 /* Generate MCV slot entry */
2209                 if (num_mcv > 0)
2210                 {
2211                         MemoryContext old_context;
2212                         Datum      *mcv_values;
2213                         float4     *mcv_freqs;
2214
2215                         /* Must copy the target values into anl_context */
2216                         old_context = MemoryContextSwitchTo(stats->anl_context);
2217                         mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2218                         mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2219                         for (i = 0; i < num_mcv; i++)
2220                         {
2221                                 mcv_values[i] = datumCopy(track[i].value,
2222                                                                                   stats->attrtype->typbyval,
2223                                                                                   stats->attrtype->typlen);
2224                                 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2225                         }
2226                         MemoryContextSwitchTo(old_context);
2227
2228                         stats->stakind[0] = STATISTIC_KIND_MCV;
2229                         stats->staop[0] = mystats->eqopr;
2230                         stats->stanumbers[0] = mcv_freqs;
2231                         stats->numnumbers[0] = num_mcv;
2232                         stats->stavalues[0] = mcv_values;
2233                         stats->numvalues[0] = num_mcv;
2234
2235                         /*
2236                          * Accept the defaults for stats->statypid and others. They have
2237                          * been set before we were called (see vacuum.h)
2238                          */
2239                 }
2240         }
2241         else if (null_cnt > 0)
2242         {
2243                 /* We found only nulls; assume the column is entirely null */
2244                 stats->stats_valid = true;
2245                 stats->stanullfrac = 1.0;
2246                 if (is_varwidth)
2247                         stats->stawidth = 0;    /* "unknown" */
2248                 else
2249                         stats->stawidth = stats->attrtype->typlen;
2250                 stats->stadistinct = 0.0;               /* "unknown" */
2251         }
2252
2253         /* We don't need to bother cleaning up any of our temporary palloc's */
2254 }
2255
2256
2257 /*
2258  *      compute_scalar_stats() -- compute column statistics
2259  *
2260  *      We use this when we can find "=" and "<" operators for the datatype.
2261  *
2262  *      We determine the fraction of non-null rows, the average width, the
2263  *      most common values, the (estimated) number of distinct values, the
2264  *      distribution histogram, and the correlation of physical to logical order.
2265  *
2266  *      The desired stats can be determined fairly easily after sorting the
2267  *      data values into order.
2268  */
2269 static void
2270 compute_scalar_stats(VacAttrStatsP stats,
2271                                          AnalyzeAttrFetchFunc fetchfunc,
2272                                          int samplerows,
2273                                          double totalrows)
2274 {
2275         int                     i;
2276         int                     null_cnt = 0;
2277         int                     nonnull_cnt = 0;
2278         int                     toowide_cnt = 0;
2279         double          total_width = 0;
2280         bool            is_varlena = (!stats->attrtype->typbyval &&
2281                                                           stats->attrtype->typlen == -1);
2282         bool            is_varwidth = (!stats->attrtype->typbyval &&
2283                                                            stats->attrtype->typlen < 0);
2284         double          corr_xysum;
2285         SortSupportData ssup;
2286         ScalarItem *values;
2287         int                     values_cnt = 0;
2288         int                *tupnoLink;
2289         ScalarMCVItem *track;
2290         int                     track_cnt = 0;
2291         int                     num_mcv = stats->attr->attstattarget;
2292         int                     num_bins = stats->attr->attstattarget;
2293         StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
2294
2295         values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
2296         tupnoLink = (int *) palloc(samplerows * sizeof(int));
2297         track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
2298
2299         memset(&ssup, 0, sizeof(ssup));
2300         ssup.ssup_cxt = CurrentMemoryContext;
2301         /* We always use the default collation for statistics */
2302         ssup.ssup_collation = DEFAULT_COLLATION_OID;
2303         ssup.ssup_nulls_first = false;
2304         /*
2305          * For now, don't perform abbreviated key conversion, because full values
2306          * are required for MCV slot generation.  Supporting that optimization
2307          * would necessitate teaching compare_scalars() to call a tie-breaker.
2308          */
2309         ssup.abbreviate = false;
2310
2311         PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
2312
2313         /* Initial scan to find sortable values */
2314         for (i = 0; i < samplerows; i++)
2315         {
2316                 Datum           value;
2317                 bool            isnull;
2318
2319                 vacuum_delay_point();
2320
2321                 value = fetchfunc(stats, i, &isnull);
2322
2323                 /* Check for null/nonnull */
2324                 if (isnull)
2325                 {
2326                         null_cnt++;
2327                         continue;
2328                 }
2329                 nonnull_cnt++;
2330
2331                 /*
2332                  * If it's a variable-width field, add up widths for average width
2333                  * calculation.  Note that if the value is toasted, we use the toasted
2334                  * width.  We don't bother with this calculation if it's a fixed-width
2335                  * type.
2336                  */
2337                 if (is_varlena)
2338                 {
2339                         total_width += VARSIZE_ANY(DatumGetPointer(value));
2340
2341                         /*
2342                          * If the value is toasted, we want to detoast it just once to
2343                          * avoid repeated detoastings and resultant excess memory usage
2344                          * during the comparisons.  Also, check to see if the value is
2345                          * excessively wide, and if so don't detoast at all --- just
2346                          * ignore the value.
2347                          */
2348                         if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2349                         {
2350                                 toowide_cnt++;
2351                                 continue;
2352                         }
2353                         value = PointerGetDatum(PG_DETOAST_DATUM(value));
2354                 }
2355                 else if (is_varwidth)
2356                 {
2357                         /* must be cstring */
2358                         total_width += strlen(DatumGetCString(value)) + 1;
2359                 }
2360
2361                 /* Add it to the list to be sorted */
2362                 values[values_cnt].value = value;
2363                 values[values_cnt].tupno = values_cnt;
2364                 tupnoLink[values_cnt] = values_cnt;
2365                 values_cnt++;
2366         }
2367
2368         /* We can only compute real stats if we found some sortable values. */
2369         if (values_cnt > 0)
2370         {
2371                 int                     ndistinct,      /* # distinct values in sample */
2372                                         nmultiple,      /* # that appear multiple times */
2373                                         num_hist,
2374                                         dups_cnt;
2375                 int                     slot_idx = 0;
2376                 CompareScalarsContext cxt;
2377
2378                 /* Sort the collected values */
2379                 cxt.ssup = &ssup;
2380                 cxt.tupnoLink = tupnoLink;
2381                 qsort_arg((void *) values, values_cnt, sizeof(ScalarItem),
2382                                   compare_scalars, (void *) &cxt);
2383
2384                 /*
2385                  * Now scan the values in order, find the most common ones, and also
2386                  * accumulate ordering-correlation statistics.
2387                  *
2388                  * To determine which are most common, we first have to count the
2389                  * number of duplicates of each value.  The duplicates are adjacent in
2390                  * the sorted list, so a brute-force approach is to compare successive
2391                  * datum values until we find two that are not equal. However, that
2392                  * requires N-1 invocations of the datum comparison routine, which are
2393                  * completely redundant with work that was done during the sort.  (The
2394                  * sort algorithm must at some point have compared each pair of items
2395                  * that are adjacent in the sorted order; otherwise it could not know
2396                  * that it's ordered the pair correctly.) We exploit this by having
2397                  * compare_scalars remember the highest tupno index that each
2398                  * ScalarItem has been found equal to.  At the end of the sort, a
2399                  * ScalarItem's tupnoLink will still point to itself if and only if it
2400                  * is the last item of its group of duplicates (since the group will
2401                  * be ordered by tupno).
2402                  */
2403                 corr_xysum = 0;
2404                 ndistinct = 0;
2405                 nmultiple = 0;
2406                 dups_cnt = 0;
2407                 for (i = 0; i < values_cnt; i++)
2408                 {
2409                         int                     tupno = values[i].tupno;
2410
2411                         corr_xysum += ((double) i) * ((double) tupno);
2412                         dups_cnt++;
2413                         if (tupnoLink[tupno] == tupno)
2414                         {
2415                                 /* Reached end of duplicates of this value */
2416                                 ndistinct++;
2417                                 if (dups_cnt > 1)
2418                                 {
2419                                         nmultiple++;
2420                                         if (track_cnt < num_mcv ||
2421                                                 dups_cnt > track[track_cnt - 1].count)
2422                                         {
2423                                                 /*
2424                                                  * Found a new item for the mcv list; find its
2425                                                  * position, bubbling down old items if needed. Loop
2426                                                  * invariant is that j points at an empty/ replaceable
2427                                                  * slot.
2428                                                  */
2429                                                 int                     j;
2430
2431                                                 if (track_cnt < num_mcv)
2432                                                         track_cnt++;
2433                                                 for (j = track_cnt - 1; j > 0; j--)
2434                                                 {
2435                                                         if (dups_cnt <= track[j - 1].count)
2436                                                                 break;
2437                                                         track[j].count = track[j - 1].count;
2438                                                         track[j].first = track[j - 1].first;
2439                                                 }
2440                                                 track[j].count = dups_cnt;
2441                                                 track[j].first = i + 1 - dups_cnt;
2442                                         }
2443                                 }
2444                                 dups_cnt = 0;
2445                         }
2446                 }
2447
2448                 stats->stats_valid = true;
2449                 /* Do the simple null-frac and width stats */
2450                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
2451                 if (is_varwidth)
2452                         stats->stawidth = total_width / (double) nonnull_cnt;
2453                 else
2454                         stats->stawidth = stats->attrtype->typlen;
2455
2456                 if (nmultiple == 0)
2457                 {
2458                         /* If we found no repeated values, assume it's a unique column */
2459                         stats->stadistinct = -1.0;
2460                 }
2461                 else if (toowide_cnt == 0 && nmultiple == ndistinct)
2462                 {
2463                         /*
2464                          * Every value in the sample appeared more than once.  Assume the
2465                          * column has just these values.
2466                          */
2467                         stats->stadistinct = ndistinct;
2468                 }
2469                 else
2470                 {
2471                         /*----------
2472                          * Estimate the number of distinct values using the estimator
2473                          * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2474                          *              n*d / (n - f1 + f1*n/N)
2475                          * where f1 is the number of distinct values that occurred
2476                          * exactly once in our sample of n rows (from a total of N),
2477                          * and d is the total number of distinct values in the sample.
2478                          * This is their Duj1 estimator; the other estimators they
2479                          * recommend are considerably more complex, and are numerically
2480                          * very unstable when n is much smaller than N.
2481                          *
2482                          * Overwidth values are assumed to have been distinct.
2483                          *----------
2484                          */
2485                         int                     f1 = ndistinct - nmultiple + toowide_cnt;
2486                         int                     d = f1 + nmultiple;
2487                         double          numer,
2488                                                 denom,
2489                                                 stadistinct;
2490
2491                         numer = (double) samplerows *(double) d;
2492
2493                         denom = (double) (samplerows - f1) +
2494                                 (double) f1 *(double) samplerows / totalrows;
2495
2496                         stadistinct = numer / denom;
2497                         /* Clamp to sane range in case of roundoff error */
2498                         if (stadistinct < (double) d)
2499                                 stadistinct = (double) d;
2500                         if (stadistinct > totalrows)
2501                                 stadistinct = totalrows;
2502                         stats->stadistinct = floor(stadistinct + 0.5);
2503                 }
2504
2505                 /*
2506                  * If we estimated the number of distinct values at more than 10% of
2507                  * the total row count (a very arbitrary limit), then assume that
2508                  * stadistinct should scale with the row count rather than be a fixed
2509                  * value.
2510                  */
2511                 if (stats->stadistinct > 0.1 * totalrows)
2512                         stats->stadistinct = -(stats->stadistinct / totalrows);
2513
2514                 /*
2515                  * Decide how many values are worth storing as most-common values. If
2516                  * we are able to generate a complete MCV list (all the values in the
2517                  * sample will fit, and we think these are all the ones in the table),
2518                  * then do so.  Otherwise, store only those values that are
2519                  * significantly more common than the (estimated) average. We set the
2520                  * threshold rather arbitrarily at 25% more than average, with at
2521                  * least 2 instances in the sample.  Also, we won't suppress values
2522                  * that have a frequency of at least 1/K where K is the intended
2523                  * number of histogram bins; such values might otherwise cause us to
2524                  * emit duplicate histogram bin boundaries.  (We might end up with
2525                  * duplicate histogram entries anyway, if the distribution is skewed;
2526                  * but we prefer to treat such values as MCVs if at all possible.)
2527                  */
2528                 if (track_cnt == ndistinct && toowide_cnt == 0 &&
2529                         stats->stadistinct > 0 &&
2530                         track_cnt <= num_mcv)
2531                 {
2532                         /* Track list includes all values seen, and all will fit */
2533                         num_mcv = track_cnt;
2534                 }
2535                 else
2536                 {
2537                         double          ndistinct = stats->stadistinct;
2538                         double          avgcount,
2539                                                 mincount,
2540                                                 maxmincount;
2541
2542                         if (ndistinct < 0)
2543                                 ndistinct = -ndistinct * totalrows;
2544                         /* estimate # of occurrences in sample of a typical value */
2545                         avgcount = (double) samplerows / ndistinct;
2546                         /* set minimum threshold count to store a value */
2547                         mincount = avgcount * 1.25;
2548                         if (mincount < 2)
2549                                 mincount = 2;
2550                         /* don't let threshold exceed 1/K, however */
2551                         maxmincount = (double) samplerows / (double) num_bins;
2552                         if (mincount > maxmincount)
2553                                 mincount = maxmincount;
2554                         if (num_mcv > track_cnt)
2555                                 num_mcv = track_cnt;
2556                         for (i = 0; i < num_mcv; i++)
2557                         {
2558                                 if (track[i].count < mincount)
2559                                 {
2560                                         num_mcv = i;
2561                                         break;
2562                                 }
2563                         }
2564                 }
2565
2566                 /* Generate MCV slot entry */
2567                 if (num_mcv > 0)
2568                 {
2569                         MemoryContext old_context;
2570                         Datum      *mcv_values;
2571                         float4     *mcv_freqs;
2572
2573                         /* Must copy the target values into anl_context */
2574                         old_context = MemoryContextSwitchTo(stats->anl_context);
2575                         mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2576                         mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2577                         for (i = 0; i < num_mcv; i++)
2578                         {
2579                                 mcv_values[i] = datumCopy(values[track[i].first].value,
2580                                                                                   stats->attrtype->typbyval,
2581                                                                                   stats->attrtype->typlen);
2582                                 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2583                         }
2584                         MemoryContextSwitchTo(old_context);
2585
2586                         stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
2587                         stats->staop[slot_idx] = mystats->eqopr;
2588                         stats->stanumbers[slot_idx] = mcv_freqs;
2589                         stats->numnumbers[slot_idx] = num_mcv;
2590                         stats->stavalues[slot_idx] = mcv_values;
2591                         stats->numvalues[slot_idx] = num_mcv;
2592
2593                         /*
2594                          * Accept the defaults for stats->statypid and others. They have
2595                          * been set before we were called (see vacuum.h)
2596                          */
2597                         slot_idx++;
2598                 }
2599
2600                 /*
2601                  * Generate a histogram slot entry if there are at least two distinct
2602                  * values not accounted for in the MCV list.  (This ensures the
2603                  * histogram won't collapse to empty or a singleton.)
2604                  */
2605                 num_hist = ndistinct - num_mcv;
2606                 if (num_hist > num_bins)
2607                         num_hist = num_bins + 1;
2608                 if (num_hist >= 2)
2609                 {
2610                         MemoryContext old_context;
2611                         Datum      *hist_values;
2612                         int                     nvals;
2613                         int                     pos,
2614                                                 posfrac,
2615                                                 delta,
2616                                                 deltafrac;
2617
2618                         /* Sort the MCV items into position order to speed next loop */
2619                         qsort((void *) track, num_mcv,
2620                                   sizeof(ScalarMCVItem), compare_mcvs);
2621
2622                         /*
2623                          * Collapse out the MCV items from the values[] array.
2624                          *
2625                          * Note we destroy the values[] array here... but we don't need it
2626                          * for anything more.  We do, however, still need values_cnt.
2627                          * nvals will be the number of remaining entries in values[].
2628                          */
2629                         if (num_mcv > 0)
2630                         {
2631                                 int                     src,
2632                                                         dest;
2633                                 int                     j;
2634
2635                                 src = dest = 0;
2636                                 j = 0;                  /* index of next interesting MCV item */
2637                                 while (src < values_cnt)
2638                                 {
2639                                         int                     ncopy;
2640
2641                                         if (j < num_mcv)
2642                                         {
2643                                                 int                     first = track[j].first;
2644
2645                                                 if (src >= first)
2646                                                 {
2647                                                         /* advance past this MCV item */
2648                                                         src = first + track[j].count;
2649                                                         j++;
2650                                                         continue;
2651                                                 }
2652                                                 ncopy = first - src;
2653                                         }
2654                                         else
2655                                                 ncopy = values_cnt - src;
2656                                         memmove(&values[dest], &values[src],
2657                                                         ncopy * sizeof(ScalarItem));
2658                                         src += ncopy;
2659                                         dest += ncopy;
2660                                 }
2661                                 nvals = dest;
2662                         }
2663                         else
2664                                 nvals = values_cnt;
2665                         Assert(nvals >= num_hist);
2666
2667                         /* Must copy the target values into anl_context */
2668                         old_context = MemoryContextSwitchTo(stats->anl_context);
2669                         hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
2670
2671                         /*
2672                          * The object of this loop is to copy the first and last values[]
2673                          * entries along with evenly-spaced values in between.  So the
2674                          * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)].  But
2675                          * computing that subscript directly risks integer overflow when
2676                          * the stats target is more than a couple thousand.  Instead we
2677                          * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
2678                          * the integral and fractional parts of the sum separately.
2679                          */
2680                         delta = (nvals - 1) / (num_hist - 1);
2681                         deltafrac = (nvals - 1) % (num_hist - 1);
2682                         pos = posfrac = 0;
2683
2684                         for (i = 0; i < num_hist; i++)
2685                         {
2686                                 hist_values[i] = datumCopy(values[pos].value,
2687                                                                                    stats->attrtype->typbyval,
2688                                                                                    stats->attrtype->typlen);
2689                                 pos += delta;
2690                                 posfrac += deltafrac;
2691                                 if (posfrac >= (num_hist - 1))
2692                                 {
2693                                         /* fractional part exceeds 1, carry to integer part */
2694                                         pos++;
2695                                         posfrac -= (num_hist - 1);
2696                                 }
2697                         }
2698
2699                         MemoryContextSwitchTo(old_context);
2700
2701                         stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
2702                         stats->staop[slot_idx] = mystats->ltopr;
2703                         stats->stavalues[slot_idx] = hist_values;
2704                         stats->numvalues[slot_idx] = num_hist;
2705
2706                         /*
2707                          * Accept the defaults for stats->statypid and others. They have
2708                          * been set before we were called (see vacuum.h)
2709                          */
2710                         slot_idx++;
2711                 }
2712
2713                 /* Generate a correlation entry if there are multiple values */
2714                 if (values_cnt > 1)
2715                 {
2716                         MemoryContext old_context;
2717                         float4     *corrs;
2718                         double          corr_xsum,
2719                                                 corr_x2sum;
2720
2721                         /* Must copy the target values into anl_context */
2722                         old_context = MemoryContextSwitchTo(stats->anl_context);
2723                         corrs = (float4 *) palloc(sizeof(float4));
2724                         MemoryContextSwitchTo(old_context);
2725
2726                         /*----------
2727                          * Since we know the x and y value sets are both
2728                          *              0, 1, ..., values_cnt-1
2729                          * we have sum(x) = sum(y) =
2730                          *              (values_cnt-1)*values_cnt / 2
2731                          * and sum(x^2) = sum(y^2) =
2732                          *              (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
2733                          *----------
2734                          */
2735                         corr_xsum = ((double) (values_cnt - 1)) *
2736                                 ((double) values_cnt) / 2.0;
2737                         corr_x2sum = ((double) (values_cnt - 1)) *
2738                                 ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
2739
2740                         /* And the correlation coefficient reduces to */
2741                         corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
2742                                 (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
2743
2744                         stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
2745                         stats->staop[slot_idx] = mystats->ltopr;
2746                         stats->stanumbers[slot_idx] = corrs;
2747                         stats->numnumbers[slot_idx] = 1;
2748                         slot_idx++;
2749                 }
2750         }
2751         else if (nonnull_cnt > 0)
2752         {
2753                 /* We found some non-null values, but they were all too wide */
2754                 Assert(nonnull_cnt == toowide_cnt);
2755                 stats->stats_valid = true;
2756                 /* Do the simple null-frac and width stats */
2757                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
2758                 if (is_varwidth)
2759                         stats->stawidth = total_width / (double) nonnull_cnt;
2760                 else
2761                         stats->stawidth = stats->attrtype->typlen;
2762                 /* Assume all too-wide values are distinct, so it's a unique column */
2763                 stats->stadistinct = -1.0;
2764         }
2765         else if (null_cnt > 0)
2766         {
2767                 /* We found only nulls; assume the column is entirely null */
2768                 stats->stats_valid = true;
2769                 stats->stanullfrac = 1.0;
2770                 if (is_varwidth)
2771                         stats->stawidth = 0;    /* "unknown" */
2772                 else
2773                         stats->stawidth = stats->attrtype->typlen;
2774                 stats->stadistinct = 0.0;               /* "unknown" */
2775         }
2776
2777         /* We don't need to bother cleaning up any of our temporary palloc's */
2778 }
2779
2780 /*
2781  * qsort_arg comparator for sorting ScalarItems
2782  *
2783  * Aside from sorting the items, we update the tupnoLink[] array
2784  * whenever two ScalarItems are found to contain equal datums.  The array
2785  * is indexed by tupno; for each ScalarItem, it contains the highest
2786  * tupno that that item's datum has been found to be equal to.  This allows
2787  * us to avoid additional comparisons in compute_scalar_stats().
2788  */
2789 static int
2790 compare_scalars(const void *a, const void *b, void *arg)
2791 {
2792         Datum           da = ((const ScalarItem *) a)->value;
2793         int                     ta = ((const ScalarItem *) a)->tupno;
2794         Datum           db = ((const ScalarItem *) b)->value;
2795         int                     tb = ((const ScalarItem *) b)->tupno;
2796         CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
2797         int                     compare;
2798
2799         compare = ApplySortComparator(da, false, db, false, cxt->ssup);
2800         if (compare != 0)
2801                 return compare;
2802
2803         /*
2804          * The two datums are equal, so update cxt->tupnoLink[].
2805          */
2806         if (cxt->tupnoLink[ta] < tb)
2807                 cxt->tupnoLink[ta] = tb;
2808         if (cxt->tupnoLink[tb] < ta)
2809                 cxt->tupnoLink[tb] = ta;
2810
2811         /*
2812          * For equal datums, sort by tupno
2813          */
2814         return ta - tb;
2815 }
2816
2817 /*
2818  * qsort comparator for sorting ScalarMCVItems by position
2819  */
2820 static int
2821 compare_mcvs(const void *a, const void *b)
2822 {
2823         int                     da = ((const ScalarMCVItem *) a)->first;
2824         int                     db = ((const ScalarMCVItem *) b)->first;
2825
2826         return da - db;
2827 }