]> granicus.if.org Git - postgresql/blob - contrib/file_fdw/file_fdw.c
TABLESAMPLE, SQL Standard and extensible
[postgresql] / contrib / file_fdw / file_fdw.c
1 /*-------------------------------------------------------------------------
2  *
3  * file_fdw.c
4  *                foreign-data wrapper for server-side flat files.
5  *
6  * Copyright (c) 2010-2015, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *                contrib/file_fdw/file_fdw.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include <sys/stat.h>
16 #include <unistd.h>
17
18 #include "access/htup_details.h"
19 #include "access/reloptions.h"
20 #include "access/sysattr.h"
21 #include "catalog/pg_foreign_table.h"
22 #include "commands/copy.h"
23 #include "commands/defrem.h"
24 #include "commands/explain.h"
25 #include "commands/vacuum.h"
26 #include "foreign/fdwapi.h"
27 #include "foreign/foreign.h"
28 #include "miscadmin.h"
29 #include "nodes/makefuncs.h"
30 #include "optimizer/cost.h"
31 #include "optimizer/pathnode.h"
32 #include "optimizer/planmain.h"
33 #include "optimizer/restrictinfo.h"
34 #include "optimizer/var.h"
35 #include "utils/memutils.h"
36 #include "utils/rel.h"
37 #include "utils/sampling.h"
38
39 PG_MODULE_MAGIC;
40
41 /*
42  * Describes the valid options for objects that use this wrapper.
43  */
44 struct FileFdwOption
45 {
46         const char *optname;
47         Oid                     optcontext;             /* Oid of catalog in which option may appear */
48 };
49
50 /*
51  * Valid options for file_fdw.
52  * These options are based on the options for the COPY FROM command.
53  * But note that force_not_null and force_null are handled as boolean options
54  * attached to a column, not as table options.
55  *
56  * Note: If you are adding new option for user mapping, you need to modify
57  * fileGetOptions(), which currently doesn't bother to look at user mappings.
58  */
59 static const struct FileFdwOption valid_options[] = {
60         /* File options */
61         {"filename", ForeignTableRelationId},
62
63         /* Format options */
64         /* oids option is not supported */
65         {"format", ForeignTableRelationId},
66         {"header", ForeignTableRelationId},
67         {"delimiter", ForeignTableRelationId},
68         {"quote", ForeignTableRelationId},
69         {"escape", ForeignTableRelationId},
70         {"null", ForeignTableRelationId},
71         {"encoding", ForeignTableRelationId},
72         {"force_not_null", AttributeRelationId},
73         {"force_null", AttributeRelationId},
74
75         /*
76          * force_quote is not supported by file_fdw because it's for COPY TO.
77          */
78
79         /* Sentinel */
80         {NULL, InvalidOid}
81 };
82
83 /*
84  * FDW-specific information for RelOptInfo.fdw_private.
85  */
86 typedef struct FileFdwPlanState
87 {
88         char       *filename;           /* file to read */
89         List       *options;            /* merged COPY options, excluding filename */
90         BlockNumber pages;                      /* estimate of file's physical size */
91         double          ntuples;                /* estimate of number of rows in file */
92 } FileFdwPlanState;
93
94 /*
95  * FDW-specific information for ForeignScanState.fdw_state.
96  */
97 typedef struct FileFdwExecutionState
98 {
99         char       *filename;           /* file to read */
100         List       *options;            /* merged COPY options, excluding filename */
101         CopyState       cstate;                 /* state of reading file */
102 } FileFdwExecutionState;
103
104 /*
105  * SQL functions
106  */
107 PG_FUNCTION_INFO_V1(file_fdw_handler);
108 PG_FUNCTION_INFO_V1(file_fdw_validator);
109
110 /*
111  * FDW callback routines
112  */
113 static void fileGetForeignRelSize(PlannerInfo *root,
114                                           RelOptInfo *baserel,
115                                           Oid foreigntableid);
116 static void fileGetForeignPaths(PlannerInfo *root,
117                                         RelOptInfo *baserel,
118                                         Oid foreigntableid);
119 static ForeignScan *fileGetForeignPlan(PlannerInfo *root,
120                                    RelOptInfo *baserel,
121                                    Oid foreigntableid,
122                                    ForeignPath *best_path,
123                                    List *tlist,
124                                    List *scan_clauses);
125 static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es);
126 static void fileBeginForeignScan(ForeignScanState *node, int eflags);
127 static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
128 static void fileReScanForeignScan(ForeignScanState *node);
129 static void fileEndForeignScan(ForeignScanState *node);
130 static bool fileAnalyzeForeignTable(Relation relation,
131                                                 AcquireSampleRowsFunc *func,
132                                                 BlockNumber *totalpages);
133
134 /*
135  * Helper functions
136  */
137 static bool is_valid_option(const char *option, Oid context);
138 static void fileGetOptions(Oid foreigntableid,
139                            char **filename, List **other_options);
140 static List *get_file_fdw_attribute_options(Oid relid);
141 static bool check_selective_binary_conversion(RelOptInfo *baserel,
142                                                                   Oid foreigntableid,
143                                                                   List **columns);
144 static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
145                           FileFdwPlanState *fdw_private);
146 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
147                            FileFdwPlanState *fdw_private,
148                            Cost *startup_cost, Cost *total_cost);
149 static int file_acquire_sample_rows(Relation onerel, int elevel,
150                                                  HeapTuple *rows, int targrows,
151                                                  double *totalrows, double *totaldeadrows);
152
153
154 /*
155  * Foreign-data wrapper handler function: return a struct with pointers
156  * to my callback routines.
157  */
158 Datum
159 file_fdw_handler(PG_FUNCTION_ARGS)
160 {
161         FdwRoutine *fdwroutine = makeNode(FdwRoutine);
162
163         fdwroutine->GetForeignRelSize = fileGetForeignRelSize;
164         fdwroutine->GetForeignPaths = fileGetForeignPaths;
165         fdwroutine->GetForeignPlan = fileGetForeignPlan;
166         fdwroutine->ExplainForeignScan = fileExplainForeignScan;
167         fdwroutine->BeginForeignScan = fileBeginForeignScan;
168         fdwroutine->IterateForeignScan = fileIterateForeignScan;
169         fdwroutine->ReScanForeignScan = fileReScanForeignScan;
170         fdwroutine->EndForeignScan = fileEndForeignScan;
171         fdwroutine->AnalyzeForeignTable = fileAnalyzeForeignTable;
172
173         PG_RETURN_POINTER(fdwroutine);
174 }
175
176 /*
177  * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER,
178  * USER MAPPING or FOREIGN TABLE that uses file_fdw.
179  *
180  * Raise an ERROR if the option or its value is considered invalid.
181  */
182 Datum
183 file_fdw_validator(PG_FUNCTION_ARGS)
184 {
185         List       *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
186         Oid                     catalog = PG_GETARG_OID(1);
187         char       *filename = NULL;
188         DefElem    *force_not_null = NULL;
189         DefElem    *force_null = NULL;
190         List       *other_options = NIL;
191         ListCell   *cell;
192
193         /*
194          * Only superusers are allowed to set options of a file_fdw foreign table.
195          * This is because the filename is one of those options, and we don't want
196          * non-superusers to be able to determine which file gets read.
197          *
198          * Putting this sort of permissions check in a validator is a bit of a
199          * crock, but there doesn't seem to be any other place that can enforce
200          * the check more cleanly.
201          *
202          * Note that the valid_options[] array disallows setting filename at any
203          * options level other than foreign table --- otherwise there'd still be a
204          * security hole.
205          */
206         if (catalog == ForeignTableRelationId && !superuser())
207                 ereport(ERROR,
208                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
209                                  errmsg("only superuser can change options of a file_fdw foreign table")));
210
211         /*
212          * Check that only options supported by file_fdw, and allowed for the
213          * current object type, are given.
214          */
215         foreach(cell, options_list)
216         {
217                 DefElem    *def = (DefElem *) lfirst(cell);
218
219                 if (!is_valid_option(def->defname, catalog))
220                 {
221                         const struct FileFdwOption *opt;
222                         StringInfoData buf;
223
224                         /*
225                          * Unknown option specified, complain about it. Provide a hint
226                          * with list of valid options for the object.
227                          */
228                         initStringInfo(&buf);
229                         for (opt = valid_options; opt->optname; opt++)
230                         {
231                                 if (catalog == opt->optcontext)
232                                         appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "",
233                                                                          opt->optname);
234                         }
235
236                         ereport(ERROR,
237                                         (errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
238                                          errmsg("invalid option \"%s\"", def->defname),
239                                          buf.len > 0
240                                          ? errhint("Valid options in this context are: %s",
241                                                            buf.data)
242                                   : errhint("There are no valid options in this context.")));
243                 }
244
245                 /*
246                  * Separate out filename and column-specific options, since
247                  * ProcessCopyOptions won't accept them.
248                  */
249
250                 if (strcmp(def->defname, "filename") == 0)
251                 {
252                         if (filename)
253                                 ereport(ERROR,
254                                                 (errcode(ERRCODE_SYNTAX_ERROR),
255                                                  errmsg("conflicting or redundant options")));
256                         filename = defGetString(def);
257                 }
258
259                 /*
260                  * force_not_null is a boolean option; after validation we can discard
261                  * it - it will be retrieved later in get_file_fdw_attribute_options()
262                  */
263                 else if (strcmp(def->defname, "force_not_null") == 0)
264                 {
265                         if (force_not_null)
266                                 ereport(ERROR,
267                                                 (errcode(ERRCODE_SYNTAX_ERROR),
268                                                  errmsg("conflicting or redundant options"),
269                                                  errhint("option \"force_not_null\" supplied more than once for a column")));
270                         force_not_null = def;
271                         /* Don't care what the value is, as long as it's a legal boolean */
272                         (void) defGetBoolean(def);
273                 }
274                 /* See comments for force_not_null above */
275                 else if (strcmp(def->defname, "force_null") == 0)
276                 {
277                         if (force_null)
278                                 ereport(ERROR,
279                                                 (errcode(ERRCODE_SYNTAX_ERROR),
280                                                  errmsg("conflicting or redundant options"),
281                                                  errhint("option \"force_null\" supplied more than once for a column")));
282                         force_null = def;
283                         (void) defGetBoolean(def);
284                 }
285                 else
286                         other_options = lappend(other_options, def);
287         }
288
289         /*
290          * Now apply the core COPY code's validation logic for more checks.
291          */
292         ProcessCopyOptions(NULL, true, other_options);
293
294         /*
295          * Filename option is required for file_fdw foreign tables.
296          */
297         if (catalog == ForeignTableRelationId && filename == NULL)
298                 ereport(ERROR,
299                                 (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
300                                  errmsg("filename is required for file_fdw foreign tables")));
301
302         PG_RETURN_VOID();
303 }
304
305 /*
306  * Check if the provided option is one of the valid options.
307  * context is the Oid of the catalog holding the object the option is for.
308  */
309 static bool
310 is_valid_option(const char *option, Oid context)
311 {
312         const struct FileFdwOption *opt;
313
314         for (opt = valid_options; opt->optname; opt++)
315         {
316                 if (context == opt->optcontext && strcmp(opt->optname, option) == 0)
317                         return true;
318         }
319         return false;
320 }
321
322 /*
323  * Fetch the options for a file_fdw foreign table.
324  *
325  * We have to separate out "filename" from the other options because
326  * it must not appear in the options list passed to the core COPY code.
327  */
328 static void
329 fileGetOptions(Oid foreigntableid,
330                            char **filename, List **other_options)
331 {
332         ForeignTable *table;
333         ForeignServer *server;
334         ForeignDataWrapper *wrapper;
335         List       *options;
336         ListCell   *lc,
337                            *prev;
338
339         /*
340          * Extract options from FDW objects.  We ignore user mappings because
341          * file_fdw doesn't have any options that can be specified there.
342          *
343          * (XXX Actually, given the current contents of valid_options[], there's
344          * no point in examining anything except the foreign table's own options.
345          * Simplify?)
346          */
347         table = GetForeignTable(foreigntableid);
348         server = GetForeignServer(table->serverid);
349         wrapper = GetForeignDataWrapper(server->fdwid);
350
351         options = NIL;
352         options = list_concat(options, wrapper->options);
353         options = list_concat(options, server->options);
354         options = list_concat(options, table->options);
355         options = list_concat(options, get_file_fdw_attribute_options(foreigntableid));
356
357         /*
358          * Separate out the filename.
359          */
360         *filename = NULL;
361         prev = NULL;
362         foreach(lc, options)
363         {
364                 DefElem    *def = (DefElem *) lfirst(lc);
365
366                 if (strcmp(def->defname, "filename") == 0)
367                 {
368                         *filename = defGetString(def);
369                         options = list_delete_cell(options, lc, prev);
370                         break;
371                 }
372                 prev = lc;
373         }
374
375         /*
376          * The validator should have checked that a filename was included in the
377          * options, but check again, just in case.
378          */
379         if (*filename == NULL)
380                 elog(ERROR, "filename is required for file_fdw foreign tables");
381
382         *other_options = options;
383 }
384
385 /*
386  * Retrieve per-column generic options from pg_attribute and construct a list
387  * of DefElems representing them.
388  *
389  * At the moment we only have "force_not_null", and "force_null",
390  * which should each be combined into a single DefElem listing all such
391  * columns, since that's what COPY expects.
392  */
393 static List *
394 get_file_fdw_attribute_options(Oid relid)
395 {
396         Relation        rel;
397         TupleDesc       tupleDesc;
398         AttrNumber      natts;
399         AttrNumber      attnum;
400         List       *fnncolumns = NIL;
401         List       *fncolumns = NIL;
402
403         List       *options = NIL;
404
405         rel = heap_open(relid, AccessShareLock);
406         tupleDesc = RelationGetDescr(rel);
407         natts = tupleDesc->natts;
408
409         /* Retrieve FDW options for all user-defined attributes. */
410         for (attnum = 1; attnum <= natts; attnum++)
411         {
412                 Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
413                 List       *options;
414                 ListCell   *lc;
415
416                 /* Skip dropped attributes. */
417                 if (attr->attisdropped)
418                         continue;
419
420                 options = GetForeignColumnOptions(relid, attnum);
421                 foreach(lc, options)
422                 {
423                         DefElem    *def = (DefElem *) lfirst(lc);
424
425                         if (strcmp(def->defname, "force_not_null") == 0)
426                         {
427                                 if (defGetBoolean(def))
428                                 {
429                                         char       *attname = pstrdup(NameStr(attr->attname));
430
431                                         fnncolumns = lappend(fnncolumns, makeString(attname));
432                                 }
433                         }
434                         else if (strcmp(def->defname, "force_null") == 0)
435                         {
436                                 if (defGetBoolean(def))
437                                 {
438                                         char       *attname = pstrdup(NameStr(attr->attname));
439
440                                         fncolumns = lappend(fncolumns, makeString(attname));
441                                 }
442                         }
443                         /* maybe in future handle other options here */
444                 }
445         }
446
447         heap_close(rel, AccessShareLock);
448
449         /*
450          * Return DefElem only when some column(s) have force_not_null /
451          * force_null options set
452          */
453         if (fnncolumns != NIL)
454                 options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns));
455
456         if (fncolumns != NIL)
457                 options = lappend(options, makeDefElem("force_null", (Node *) fncolumns));
458
459         return options;
460 }
461
462 /*
463  * fileGetForeignRelSize
464  *              Obtain relation size estimates for a foreign table
465  */
466 static void
467 fileGetForeignRelSize(PlannerInfo *root,
468                                           RelOptInfo *baserel,
469                                           Oid foreigntableid)
470 {
471         FileFdwPlanState *fdw_private;
472
473         /*
474          * Fetch options.  We only need filename at this point, but we might as
475          * well get everything and not need to re-fetch it later in planning.
476          */
477         fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState));
478         fileGetOptions(foreigntableid,
479                                    &fdw_private->filename, &fdw_private->options);
480         baserel->fdw_private = (void *) fdw_private;
481
482         /* Estimate relation size */
483         estimate_size(root, baserel, fdw_private);
484 }
485
486 /*
487  * fileGetForeignPaths
488  *              Create possible access paths for a scan on the foreign table
489  *
490  *              Currently we don't support any push-down feature, so there is only one
491  *              possible access path, which simply returns all records in the order in
492  *              the data file.
493  */
494 static void
495 fileGetForeignPaths(PlannerInfo *root,
496                                         RelOptInfo *baserel,
497                                         Oid foreigntableid)
498 {
499         FileFdwPlanState *fdw_private = (FileFdwPlanState *) baserel->fdw_private;
500         Cost            startup_cost;
501         Cost            total_cost;
502         List       *columns;
503         List       *coptions = NIL;
504
505         /* Decide whether to selectively perform binary conversion */
506         if (check_selective_binary_conversion(baserel,
507                                                                                   foreigntableid,
508                                                                                   &columns))
509                 coptions = list_make1(makeDefElem("convert_selectively",
510                                                                                   (Node *) columns));
511
512         /* Estimate costs */
513         estimate_costs(root, baserel, fdw_private,
514                                    &startup_cost, &total_cost);
515
516         /*
517          * Create a ForeignPath node and add it as only possible path.  We use the
518          * fdw_private list of the path to carry the convert_selectively option;
519          * it will be propagated into the fdw_private list of the Plan node.
520          */
521         add_path(baserel, (Path *)
522                          create_foreignscan_path(root, baserel,
523                                                                          baserel->rows,
524                                                                          startup_cost,
525                                                                          total_cost,
526                                                                          NIL,           /* no pathkeys */
527                                                                          NULL,          /* no outer rel either */
528                                                                          coptions));
529
530         /*
531          * If data file was sorted, and we knew it somehow, we could insert
532          * appropriate pathkeys into the ForeignPath node to tell the planner
533          * that.
534          */
535 }
536
537 /*
538  * fileGetForeignPlan
539  *              Create a ForeignScan plan node for scanning the foreign table
540  */
541 static ForeignScan *
542 fileGetForeignPlan(PlannerInfo *root,
543                                    RelOptInfo *baserel,
544                                    Oid foreigntableid,
545                                    ForeignPath *best_path,
546                                    List *tlist,
547                                    List *scan_clauses)
548 {
549         Index           scan_relid = baserel->relid;
550
551         /*
552          * We have no native ability to evaluate restriction clauses, so we just
553          * put all the scan_clauses into the plan node's qual list for the
554          * executor to check.  So all we have to do here is strip RestrictInfo
555          * nodes from the clauses and ignore pseudoconstants (which will be
556          * handled elsewhere).
557          */
558         scan_clauses = extract_actual_clauses(scan_clauses, false);
559
560         /* Create the ForeignScan node */
561         return make_foreignscan(tlist,
562                                                         scan_clauses,
563                                                         scan_relid,
564                                                         NIL,    /* no expressions to evaluate */
565                                                         best_path->fdw_private,
566                                                         NIL /* no custom tlist */ );
567 }
568
569 /*
570  * fileExplainForeignScan
571  *              Produce extra output for EXPLAIN
572  */
573 static void
574 fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
575 {
576         char       *filename;
577         List       *options;
578
579         /* Fetch options --- we only need filename at this point */
580         fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
581                                    &filename, &options);
582
583         ExplainPropertyText("Foreign File", filename, es);
584
585         /* Suppress file size if we're not showing cost details */
586         if (es->costs)
587         {
588                 struct stat stat_buf;
589
590                 if (stat(filename, &stat_buf) == 0)
591                         ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
592                                                                 es);
593         }
594 }
595
596 /*
597  * fileBeginForeignScan
598  *              Initiate access to the file by creating CopyState
599  */
600 static void
601 fileBeginForeignScan(ForeignScanState *node, int eflags)
602 {
603         ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
604         char       *filename;
605         List       *options;
606         CopyState       cstate;
607         FileFdwExecutionState *festate;
608
609         /*
610          * Do nothing in EXPLAIN (no ANALYZE) case.  node->fdw_state stays NULL.
611          */
612         if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
613                 return;
614
615         /* Fetch options of foreign table */
616         fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
617                                    &filename, &options);
618
619         /* Add any options from the plan (currently only convert_selectively) */
620         options = list_concat(options, plan->fdw_private);
621
622         /*
623          * Create CopyState from FDW options.  We always acquire all columns, so
624          * as to match the expected ScanTupleSlot signature.
625          */
626         cstate = BeginCopyFrom(node->ss.ss_currentRelation,
627                                                    filename,
628                                                    false,
629                                                    NIL,
630                                                    options);
631
632         /*
633          * Save state in node->fdw_state.  We must save enough information to call
634          * BeginCopyFrom() again.
635          */
636         festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
637         festate->filename = filename;
638         festate->options = options;
639         festate->cstate = cstate;
640
641         node->fdw_state = (void *) festate;
642 }
643
644 /*
645  * fileIterateForeignScan
646  *              Read next record from the data file and store it into the
647  *              ScanTupleSlot as a virtual tuple
648  */
649 static TupleTableSlot *
650 fileIterateForeignScan(ForeignScanState *node)
651 {
652         FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
653         TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
654         bool            found;
655         ErrorContextCallback errcallback;
656
657         /* Set up callback to identify error line number. */
658         errcallback.callback = CopyFromErrorCallback;
659         errcallback.arg = (void *) festate->cstate;
660         errcallback.previous = error_context_stack;
661         error_context_stack = &errcallback;
662
663         /*
664          * The protocol for loading a virtual tuple into a slot is first
665          * ExecClearTuple, then fill the values/isnull arrays, then
666          * ExecStoreVirtualTuple.  If we don't find another row in the file, we
667          * just skip the last step, leaving the slot empty as required.
668          *
669          * We can pass ExprContext = NULL because we read all columns from the
670          * file, so no need to evaluate default expressions.
671          *
672          * We can also pass tupleOid = NULL because we don't allow oids for
673          * foreign tables.
674          */
675         ExecClearTuple(slot);
676         found = NextCopyFrom(festate->cstate, NULL,
677                                                  slot->tts_values, slot->tts_isnull,
678                                                  NULL);
679         if (found)
680                 ExecStoreVirtualTuple(slot);
681
682         /* Remove error callback. */
683         error_context_stack = errcallback.previous;
684
685         return slot;
686 }
687
688 /*
689  * fileReScanForeignScan
690  *              Rescan table, possibly with new parameters
691  */
692 static void
693 fileReScanForeignScan(ForeignScanState *node)
694 {
695         FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
696
697         EndCopyFrom(festate->cstate);
698
699         festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
700                                                                         festate->filename,
701                                                                         false,
702                                                                         NIL,
703                                                                         festate->options);
704 }
705
706 /*
707  * fileEndForeignScan
708  *              Finish scanning foreign table and dispose objects used for this scan
709  */
710 static void
711 fileEndForeignScan(ForeignScanState *node)
712 {
713         FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
714
715         /* if festate is NULL, we are in EXPLAIN; nothing to do */
716         if (festate)
717                 EndCopyFrom(festate->cstate);
718 }
719
720 /*
721  * fileAnalyzeForeignTable
722  *              Test whether analyzing this foreign table is supported
723  */
724 static bool
725 fileAnalyzeForeignTable(Relation relation,
726                                                 AcquireSampleRowsFunc *func,
727                                                 BlockNumber *totalpages)
728 {
729         char       *filename;
730         List       *options;
731         struct stat stat_buf;
732
733         /* Fetch options of foreign table */
734         fileGetOptions(RelationGetRelid(relation), &filename, &options);
735
736         /*
737          * Get size of the file.  (XXX if we fail here, would it be better to just
738          * return false to skip analyzing the table?)
739          */
740         if (stat(filename, &stat_buf) < 0)
741                 ereport(ERROR,
742                                 (errcode_for_file_access(),
743                                  errmsg("could not stat file \"%s\": %m",
744                                                 filename)));
745
746         /*
747          * Convert size to pages.  Must return at least 1 so that we can tell
748          * later on that pg_class.relpages is not default.
749          */
750         *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
751         if (*totalpages < 1)
752                 *totalpages = 1;
753
754         *func = file_acquire_sample_rows;
755
756         return true;
757 }
758
759 /*
760  * check_selective_binary_conversion
761  *
762  * Check to see if it's useful to convert only a subset of the file's columns
763  * to binary.  If so, construct a list of the column names to be converted,
764  * return that at *columns, and return TRUE.  (Note that it's possible to
765  * determine that no columns need be converted, for instance with a COUNT(*)
766  * query.  So we can't use returning a NIL list to indicate failure.)
767  */
768 static bool
769 check_selective_binary_conversion(RelOptInfo *baserel,
770                                                                   Oid foreigntableid,
771                                                                   List **columns)
772 {
773         ForeignTable *table;
774         ListCell   *lc;
775         Relation        rel;
776         TupleDesc       tupleDesc;
777         AttrNumber      attnum;
778         Bitmapset  *attrs_used = NULL;
779         bool            has_wholerow = false;
780         int                     numattrs;
781         int                     i;
782
783         *columns = NIL;                         /* default result */
784
785         /*
786          * Check format of the file.  If binary format, this is irrelevant.
787          */
788         table = GetForeignTable(foreigntableid);
789         foreach(lc, table->options)
790         {
791                 DefElem    *def = (DefElem *) lfirst(lc);
792
793                 if (strcmp(def->defname, "format") == 0)
794                 {
795                         char       *format = defGetString(def);
796
797                         if (strcmp(format, "binary") == 0)
798                                 return false;
799                         break;
800                 }
801         }
802
803         /* Collect all the attributes needed for joins or final output. */
804         pull_varattnos((Node *) baserel->reltargetlist, baserel->relid,
805                                    &attrs_used);
806
807         /* Add all the attributes used by restriction clauses. */
808         foreach(lc, baserel->baserestrictinfo)
809         {
810                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
811
812                 pull_varattnos((Node *) rinfo->clause, baserel->relid,
813                                            &attrs_used);
814         }
815
816         /* Convert attribute numbers to column names. */
817         rel = heap_open(foreigntableid, AccessShareLock);
818         tupleDesc = RelationGetDescr(rel);
819
820         while ((attnum = bms_first_member(attrs_used)) >= 0)
821         {
822                 /* Adjust for system attributes. */
823                 attnum += FirstLowInvalidHeapAttributeNumber;
824
825                 if (attnum == 0)
826                 {
827                         has_wholerow = true;
828                         break;
829                 }
830
831                 /* Ignore system attributes. */
832                 if (attnum < 0)
833                         continue;
834
835                 /* Get user attributes. */
836                 if (attnum > 0)
837                 {
838                         Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
839                         char       *attname = NameStr(attr->attname);
840
841                         /* Skip dropped attributes (probably shouldn't see any here). */
842                         if (attr->attisdropped)
843                                 continue;
844                         *columns = lappend(*columns, makeString(pstrdup(attname)));
845                 }
846         }
847
848         /* Count non-dropped user attributes while we have the tupdesc. */
849         numattrs = 0;
850         for (i = 0; i < tupleDesc->natts; i++)
851         {
852                 Form_pg_attribute attr = tupleDesc->attrs[i];
853
854                 if (attr->attisdropped)
855                         continue;
856                 numattrs++;
857         }
858
859         heap_close(rel, AccessShareLock);
860
861         /* If there's a whole-row reference, fail: we need all the columns. */
862         if (has_wholerow)
863         {
864                 *columns = NIL;
865                 return false;
866         }
867
868         /* If all the user attributes are needed, fail. */
869         if (numattrs == list_length(*columns))
870         {
871                 *columns = NIL;
872                 return false;
873         }
874
875         return true;
876 }
877
878 /*
879  * Estimate size of a foreign table.
880  *
881  * The main result is returned in baserel->rows.  We also set
882  * fdw_private->pages and fdw_private->ntuples for later use in the cost
883  * calculation.
884  */
885 static void
886 estimate_size(PlannerInfo *root, RelOptInfo *baserel,
887                           FileFdwPlanState *fdw_private)
888 {
889         struct stat stat_buf;
890         BlockNumber pages;
891         double          ntuples;
892         double          nrows;
893
894         /*
895          * Get size of the file.  It might not be there at plan time, though, in
896          * which case we have to use a default estimate.
897          */
898         if (stat(fdw_private->filename, &stat_buf) < 0)
899                 stat_buf.st_size = 10 * BLCKSZ;
900
901         /*
902          * Convert size to pages for use in I/O cost estimate later.
903          */
904         pages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
905         if (pages < 1)
906                 pages = 1;
907         fdw_private->pages = pages;
908
909         /*
910          * Estimate the number of tuples in the file.
911          */
912         if (baserel->pages > 0)
913         {
914                 /*
915                  * We have # of pages and # of tuples from pg_class (that is, from a
916                  * previous ANALYZE), so compute a tuples-per-page estimate and scale
917                  * that by the current file size.
918                  */
919                 double          density;
920
921                 density = baserel->tuples / (double) baserel->pages;
922                 ntuples = clamp_row_est(density * (double) pages);
923         }
924         else
925         {
926                 /*
927                  * Otherwise we have to fake it.  We back into this estimate using the
928                  * planner's idea of the relation width; which is bogus if not all
929                  * columns are being read, not to mention that the text representation
930                  * of a row probably isn't the same size as its internal
931                  * representation.  Possibly we could do something better, but the
932                  * real answer to anyone who complains is "ANALYZE" ...
933                  */
934                 int                     tuple_width;
935
936                 tuple_width = MAXALIGN(baserel->width) +
937                         MAXALIGN(SizeofHeapTupleHeader);
938                 ntuples = clamp_row_est((double) stat_buf.st_size /
939                                                                 (double) tuple_width);
940         }
941         fdw_private->ntuples = ntuples;
942
943         /*
944          * Now estimate the number of rows returned by the scan after applying the
945          * baserestrictinfo quals.
946          */
947         nrows = ntuples *
948                 clauselist_selectivity(root,
949                                                            baserel->baserestrictinfo,
950                                                            0,
951                                                            JOIN_INNER,
952                                                            NULL);
953
954         nrows = clamp_row_est(nrows);
955
956         /* Save the output-rows estimate for the planner */
957         baserel->rows = nrows;
958 }
959
960 /*
961  * Estimate costs of scanning a foreign table.
962  *
963  * Results are returned in *startup_cost and *total_cost.
964  */
965 static void
966 estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
967                            FileFdwPlanState *fdw_private,
968                            Cost *startup_cost, Cost *total_cost)
969 {
970         BlockNumber pages = fdw_private->pages;
971         double          ntuples = fdw_private->ntuples;
972         Cost            run_cost = 0;
973         Cost            cpu_per_tuple;
974
975         /*
976          * We estimate costs almost the same way as cost_seqscan(), thus assuming
977          * that I/O costs are equivalent to a regular table file of the same size.
978          * However, we take per-tuple CPU costs as 10x of a seqscan, to account
979          * for the cost of parsing records.
980          */
981         run_cost += seq_page_cost * pages;
982
983         *startup_cost = baserel->baserestrictcost.startup;
984         cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple;
985         run_cost += cpu_per_tuple * ntuples;
986         *total_cost = *startup_cost + run_cost;
987 }
988
989 /*
990  * file_acquire_sample_rows -- acquire a random sample of rows from the table
991  *
992  * Selected rows are returned in the caller-allocated array rows[],
993  * which must have at least targrows entries.
994  * The actual number of rows selected is returned as the function result.
995  * We also count the total number of rows in the file and return it into
996  * *totalrows.  Note that *totaldeadrows is always set to 0.
997  *
998  * Note that the returned list of rows is not always in order by physical
999  * position in the file.  Therefore, correlation estimates derived later
1000  * may be meaningless, but it's OK because we don't use the estimates
1001  * currently (the planner only pays attention to correlation for indexscans).
1002  */
1003 static int
1004 file_acquire_sample_rows(Relation onerel, int elevel,
1005                                                  HeapTuple *rows, int targrows,
1006                                                  double *totalrows, double *totaldeadrows)
1007 {
1008         int                     numrows = 0;
1009         double          rowstoskip = -1;        /* -1 means not set yet */
1010         ReservoirStateData rstate;
1011         TupleDesc       tupDesc;
1012         Datum      *values;
1013         bool       *nulls;
1014         bool            found;
1015         char       *filename;
1016         List       *options;
1017         CopyState       cstate;
1018         ErrorContextCallback errcallback;
1019         MemoryContext oldcontext = CurrentMemoryContext;
1020         MemoryContext tupcontext;
1021
1022         Assert(onerel);
1023         Assert(targrows > 0);
1024
1025         tupDesc = RelationGetDescr(onerel);
1026         values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
1027         nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
1028
1029         /* Fetch options of foreign table */
1030         fileGetOptions(RelationGetRelid(onerel), &filename, &options);
1031
1032         /*
1033          * Create CopyState from FDW options.
1034          */
1035         cstate = BeginCopyFrom(onerel, filename, false, NIL, options);
1036
1037         /*
1038          * Use per-tuple memory context to prevent leak of memory used to read
1039          * rows from the file with Copy routines.
1040          */
1041         tupcontext = AllocSetContextCreate(CurrentMemoryContext,
1042                                                                            "file_fdw temporary context",
1043                                                                            ALLOCSET_DEFAULT_MINSIZE,
1044                                                                            ALLOCSET_DEFAULT_INITSIZE,
1045                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1046
1047         /* Prepare for sampling rows */
1048         reservoir_init_selection_state(&rstate, targrows);
1049
1050         /* Set up callback to identify error line number. */
1051         errcallback.callback = CopyFromErrorCallback;
1052         errcallback.arg = (void *) cstate;
1053         errcallback.previous = error_context_stack;
1054         error_context_stack = &errcallback;
1055
1056         *totalrows = 0;
1057         *totaldeadrows = 0;
1058         for (;;)
1059         {
1060                 /* Check for user-requested abort or sleep */
1061                 vacuum_delay_point();
1062
1063                 /* Fetch next row */
1064                 MemoryContextReset(tupcontext);
1065                 MemoryContextSwitchTo(tupcontext);
1066
1067                 found = NextCopyFrom(cstate, NULL, values, nulls, NULL);
1068
1069                 MemoryContextSwitchTo(oldcontext);
1070
1071                 if (!found)
1072                         break;
1073
1074                 /*
1075                  * The first targrows sample rows are simply copied into the
1076                  * reservoir.  Then we start replacing tuples in the sample until we
1077                  * reach the end of the relation. This algorithm is from Jeff Vitter's
1078                  * paper (see more info in commands/analyze.c).
1079                  */
1080                 if (numrows < targrows)
1081                 {
1082                         rows[numrows++] = heap_form_tuple(tupDesc, values, nulls);
1083                 }
1084                 else
1085                 {
1086                         /*
1087                          * t in Vitter's paper is the number of records already processed.
1088                          * If we need to compute a new S value, we must use the
1089                          * not-yet-incremented value of totalrows as t.
1090                          */
1091                         if (rowstoskip < 0)
1092                                 rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows);
1093
1094                         if (rowstoskip <= 0)
1095                         {
1096                                 /*
1097                                  * Found a suitable tuple, so save it, replacing one old tuple
1098                                  * at random
1099                                  */
1100                                 int                     k = (int) (targrows * sampler_random_fract(rstate.randstate));
1101
1102                                 Assert(k >= 0 && k < targrows);
1103                                 heap_freetuple(rows[k]);
1104                                 rows[k] = heap_form_tuple(tupDesc, values, nulls);
1105                         }
1106
1107                         rowstoskip -= 1;
1108                 }
1109
1110                 *totalrows += 1;
1111         }
1112
1113         /* Remove error callback. */
1114         error_context_stack = errcallback.previous;
1115
1116         /* Clean up. */
1117         MemoryContextDelete(tupcontext);
1118
1119         EndCopyFrom(cstate);
1120
1121         pfree(values);
1122         pfree(nulls);
1123
1124         /*
1125          * Emit some interesting relation info
1126          */
1127         ereport(elevel,
1128                         (errmsg("\"%s\": file contains %.0f rows; "
1129                                         "%d rows in sample",
1130                                         RelationGetRelationName(onerel),
1131                                         *totalrows, numrows)));
1132
1133         return numrows;
1134 }