Update copyright for 2016

[postgresql] / src / backend / tsearch / ts_typanalyze.c
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c

index e97e22bc260b358053e5eb38f7b48ea3878ff9d1..0f851ead0607fcecb1fd5516593d65f78b1665e4 100644 (file)
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -3,11 +3,11 @@
   * ts_typanalyze.c
   *       functions for gathering statistics from tsvector columns
   *
- * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.9 2010/05/30 21:59:02 tgl Exp $
+ *       src/backend/tsearch/ts_typanalyze.c
   *
   *-------------------------------------------------------------------------
   */
@@ -18,7 +18,6 @@
  #include "commands/vacuum.h"
  #include "tsearch/ts_type.h"
  #include "utils/builtins.h"
-#include "utils/hsearch.h"
  
  
  /* A hash key for lexemes */
@@ -164,8 +163,8 @@ compute_tsvector_stats(VacAttrStats *stats,
         /*
          * We want statistics_target * 10 lexemes in the MCELEM array.  This
          * multiplier is pretty arbitrary, but is meant to reflect the fact that
-        * the number of individual lexeme values tracked in pg_statistic ought
-        * to be more than the number of values for a simple scalar column.
+        * the number of individual lexeme values tracked in pg_statistic ought to
+        * be more than the number of values for a simple scalar column.
          */
         num_mcelem = stats->attr->attstattarget * 10;
  
@@ -187,7 +186,7 @@ compute_tsvector_stats(VacAttrStats *stats,
         hash_ctl.match = lexeme_match;
         hash_ctl.hcxt = CurrentMemoryContext;
         lexemes_tab = hash_create("Analyzed lexemes table",
-                                                         bucket_width * 7,
+                                                         num_mcelem,
                                                           &hash_ctl,
                                         HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
  
@@ -233,7 +232,7 @@ compute_tsvector_stats(VacAttrStats *stats,
  
                 /*
                  * We loop through the lexemes in the tsvector and add them to our
-                * tracking hashtable.  Note: the hashtable entries will point into
+                * tracking hashtable.  Note: the hashtable entries will point into
                  * the (detoasted) tsvector value, therefore we cannot free that
                  * storage until we're done.
                  */
@@ -308,7 +307,7 @@ compute_tsvector_stats(VacAttrStats *stats,
                  */
                 cutoff_freq = 9 * lexeme_no / bucket_width;
  
-               i = hash_get_num_entries(lexemes_tab);          /* surely enough space */
+               i = hash_get_num_entries(lexemes_tab);  /* surely enough space */
                 sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i);
  
                 hash_seq_init(&scan_status, lexemes_tab);
@@ -332,9 +331,9 @@ compute_tsvector_stats(VacAttrStats *stats,
                          num_mcelem, bucket_width, lexeme_no, i, track_len);
  
                 /*
-                * If we obtained more lexemes than we really want, get rid of
-                * those with least frequencies.  The easiest way is to qsort the
-                * array into descending frequency order and truncate the array.
+                * If we obtained more lexemes than we really want, get rid of those
+                * with least frequencies.  The easiest way is to qsort the array into
+                * descending frequency order and truncate the array.
                  */
                 if (num_mcelem < track_len)
                 {
@@ -364,7 +363,7 @@ compute_tsvector_stats(VacAttrStats *stats,
                          * they get sorted on frequencies. The rationale is that we
                          * usually search through most common elements looking for a
                          * specific value, so we can grab its frequency.  When values are
-                        * presorted we can employ binary search for that.      See
+                        * presorted we can employ binary search for that.  See
                          * ts_selfuncs.c for a real usage scenario.
                          */
                         qsort(sort_table, num_mcelem, sizeof(TrackItem *),
@@ -378,13 +377,18 @@ compute_tsvector_stats(VacAttrStats *stats,
                          * able to find out the minimal and maximal frequency without
                          * going through all the values.  We keep those two extra
                          * frequencies in two extra cells in mcelem_freqs.
+                        *
+                        * (Note: the MCELEM statistics slot definition allows for a third
+                        * extra number containing the frequency of nulls, but we don't
+                        * create that for a tsvector column, since null elements aren't
+                        * possible.)
                          */
                         mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum));
                         mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4));
  
                         /*
-                        * See comments above about use of nonnull_cnt as the divisor
-                        * for the final frequency estimates.
+                        * See comments above about use of nonnull_cnt as the divisor for
+                        * the final frequency estimates.
                          */
                         for (i = 0; i < num_mcelem; i++)
                         {