1 /*-------------------------------------------------------------------------
4 * foreign-data wrapper for server-side flat files.
6 * Copyright (c) 2010-2016, PostgreSQL Global Development Group
9 * contrib/file_fdw/file_fdw.c
11 *-------------------------------------------------------------------------
18 #include "access/htup_details.h"
19 #include "access/reloptions.h"
20 #include "access/sysattr.h"
21 #include "catalog/pg_foreign_table.h"
22 #include "commands/copy.h"
23 #include "commands/defrem.h"
24 #include "commands/explain.h"
25 #include "commands/vacuum.h"
26 #include "foreign/fdwapi.h"
27 #include "foreign/foreign.h"
28 #include "miscadmin.h"
29 #include "nodes/makefuncs.h"
30 #include "optimizer/cost.h"
31 #include "optimizer/pathnode.h"
32 #include "optimizer/planmain.h"
33 #include "optimizer/restrictinfo.h"
34 #include "optimizer/var.h"
35 #include "utils/memutils.h"
36 #include "utils/rel.h"
37 #include "utils/sampling.h"
42 * Describes the valid options for objects that use this wrapper.
47 Oid optcontext; /* Oid of catalog in which option may appear */
51 * Valid options for file_fdw.
52 * These options are based on the options for the COPY FROM command.
53 * But note that force_not_null and force_null are handled as boolean options
54 * attached to a column, not as table options.
56 * Note: If you are adding new option for user mapping, you need to modify
57 * fileGetOptions(), which currently doesn't bother to look at user mappings.
59 static const struct FileFdwOption valid_options[] = {
61 {"filename", ForeignTableRelationId},
64 /* oids option is not supported */
65 {"format", ForeignTableRelationId},
66 {"header", ForeignTableRelationId},
67 {"delimiter", ForeignTableRelationId},
68 {"quote", ForeignTableRelationId},
69 {"escape", ForeignTableRelationId},
70 {"null", ForeignTableRelationId},
71 {"encoding", ForeignTableRelationId},
72 {"force_not_null", AttributeRelationId},
73 {"force_null", AttributeRelationId},
76 * force_quote is not supported by file_fdw because it's for COPY TO.
84 * FDW-specific information for RelOptInfo.fdw_private.
86 typedef struct FileFdwPlanState
88 char *filename; /* file to read */
89 List *options; /* merged COPY options, excluding filename */
90 BlockNumber pages; /* estimate of file's physical size */
91 double ntuples; /* estimate of number of rows in file */
95 * FDW-specific information for ForeignScanState.fdw_state.
97 typedef struct FileFdwExecutionState
99 char *filename; /* file to read */
100 List *options; /* merged COPY options, excluding filename */
101 CopyState cstate; /* state of reading file */
102 } FileFdwExecutionState;
107 PG_FUNCTION_INFO_V1(file_fdw_handler);
108 PG_FUNCTION_INFO_V1(file_fdw_validator);
111 * FDW callback routines
113 static void fileGetForeignRelSize(PlannerInfo *root,
116 static void fileGetForeignPaths(PlannerInfo *root,
119 static ForeignScan *fileGetForeignPlan(PlannerInfo *root,
122 ForeignPath *best_path,
126 static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es);
127 static void fileBeginForeignScan(ForeignScanState *node, int eflags);
128 static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
129 static void fileReScanForeignScan(ForeignScanState *node);
130 static void fileEndForeignScan(ForeignScanState *node);
131 static bool fileAnalyzeForeignTable(Relation relation,
132 AcquireSampleRowsFunc *func,
133 BlockNumber *totalpages);
134 static bool fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel,
140 static bool is_valid_option(const char *option, Oid context);
141 static void fileGetOptions(Oid foreigntableid,
142 char **filename, List **other_options);
143 static List *get_file_fdw_attribute_options(Oid relid);
144 static bool check_selective_binary_conversion(RelOptInfo *baserel,
147 static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
148 FileFdwPlanState *fdw_private);
149 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
150 FileFdwPlanState *fdw_private,
151 Cost *startup_cost, Cost *total_cost);
152 static int file_acquire_sample_rows(Relation onerel, int elevel,
153 HeapTuple *rows, int targrows,
154 double *totalrows, double *totaldeadrows);
158 * Foreign-data wrapper handler function: return a struct with pointers
159 * to my callback routines.
162 file_fdw_handler(PG_FUNCTION_ARGS)
164 FdwRoutine *fdwroutine = makeNode(FdwRoutine);
166 fdwroutine->GetForeignRelSize = fileGetForeignRelSize;
167 fdwroutine->GetForeignPaths = fileGetForeignPaths;
168 fdwroutine->GetForeignPlan = fileGetForeignPlan;
169 fdwroutine->ExplainForeignScan = fileExplainForeignScan;
170 fdwroutine->BeginForeignScan = fileBeginForeignScan;
171 fdwroutine->IterateForeignScan = fileIterateForeignScan;
172 fdwroutine->ReScanForeignScan = fileReScanForeignScan;
173 fdwroutine->EndForeignScan = fileEndForeignScan;
174 fdwroutine->AnalyzeForeignTable = fileAnalyzeForeignTable;
175 fdwroutine->IsForeignScanParallelSafe = fileIsForeignScanParallelSafe;
177 PG_RETURN_POINTER(fdwroutine);
181 * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER,
182 * USER MAPPING or FOREIGN TABLE that uses file_fdw.
184 * Raise an ERROR if the option or its value is considered invalid.
187 file_fdw_validator(PG_FUNCTION_ARGS)
189 List *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
190 Oid catalog = PG_GETARG_OID(1);
191 char *filename = NULL;
192 DefElem *force_not_null = NULL;
193 DefElem *force_null = NULL;
194 List *other_options = NIL;
198 * Only superusers are allowed to set options of a file_fdw foreign table.
199 * This is because the filename is one of those options, and we don't want
200 * non-superusers to be able to determine which file gets read.
202 * Putting this sort of permissions check in a validator is a bit of a
203 * crock, but there doesn't seem to be any other place that can enforce
204 * the check more cleanly.
206 * Note that the valid_options[] array disallows setting filename at any
207 * options level other than foreign table --- otherwise there'd still be a
210 if (catalog == ForeignTableRelationId && !superuser())
212 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
213 errmsg("only superuser can change options of a file_fdw foreign table")));
216 * Check that only options supported by file_fdw, and allowed for the
217 * current object type, are given.
219 foreach(cell, options_list)
221 DefElem *def = (DefElem *) lfirst(cell);
223 if (!is_valid_option(def->defname, catalog))
225 const struct FileFdwOption *opt;
229 * Unknown option specified, complain about it. Provide a hint
230 * with list of valid options for the object.
232 initStringInfo(&buf);
233 for (opt = valid_options; opt->optname; opt++)
235 if (catalog == opt->optcontext)
236 appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "",
241 (errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
242 errmsg("invalid option \"%s\"", def->defname),
244 ? errhint("Valid options in this context are: %s",
246 : errhint("There are no valid options in this context.")));
250 * Separate out filename and column-specific options, since
251 * ProcessCopyOptions won't accept them.
254 if (strcmp(def->defname, "filename") == 0)
258 (errcode(ERRCODE_SYNTAX_ERROR),
259 errmsg("conflicting or redundant options")));
260 filename = defGetString(def);
264 * force_not_null is a boolean option; after validation we can discard
265 * it - it will be retrieved later in get_file_fdw_attribute_options()
267 else if (strcmp(def->defname, "force_not_null") == 0)
271 (errcode(ERRCODE_SYNTAX_ERROR),
272 errmsg("conflicting or redundant options"),
273 errhint("option \"force_not_null\" supplied more than once for a column")));
274 force_not_null = def;
275 /* Don't care what the value is, as long as it's a legal boolean */
276 (void) defGetBoolean(def);
278 /* See comments for force_not_null above */
279 else if (strcmp(def->defname, "force_null") == 0)
283 (errcode(ERRCODE_SYNTAX_ERROR),
284 errmsg("conflicting or redundant options"),
285 errhint("option \"force_null\" supplied more than once for a column")));
287 (void) defGetBoolean(def);
290 other_options = lappend(other_options, def);
294 * Now apply the core COPY code's validation logic for more checks.
296 ProcessCopyOptions(NULL, true, other_options);
299 * Filename option is required for file_fdw foreign tables.
301 if (catalog == ForeignTableRelationId && filename == NULL)
303 (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
304 errmsg("filename is required for file_fdw foreign tables")));
310 * Check if the provided option is one of the valid options.
311 * context is the Oid of the catalog holding the object the option is for.
314 is_valid_option(const char *option, Oid context)
316 const struct FileFdwOption *opt;
318 for (opt = valid_options; opt->optname; opt++)
320 if (context == opt->optcontext && strcmp(opt->optname, option) == 0)
327 * Fetch the options for a file_fdw foreign table.
329 * We have to separate out "filename" from the other options because
330 * it must not appear in the options list passed to the core COPY code.
333 fileGetOptions(Oid foreigntableid,
334 char **filename, List **other_options)
337 ForeignServer *server;
338 ForeignDataWrapper *wrapper;
344 * Extract options from FDW objects. We ignore user mappings because
345 * file_fdw doesn't have any options that can be specified there.
347 * (XXX Actually, given the current contents of valid_options[], there's
348 * no point in examining anything except the foreign table's own options.
351 table = GetForeignTable(foreigntableid);
352 server = GetForeignServer(table->serverid);
353 wrapper = GetForeignDataWrapper(server->fdwid);
356 options = list_concat(options, wrapper->options);
357 options = list_concat(options, server->options);
358 options = list_concat(options, table->options);
359 options = list_concat(options, get_file_fdw_attribute_options(foreigntableid));
362 * Separate out the filename.
368 DefElem *def = (DefElem *) lfirst(lc);
370 if (strcmp(def->defname, "filename") == 0)
372 *filename = defGetString(def);
373 options = list_delete_cell(options, lc, prev);
380 * The validator should have checked that a filename was included in the
381 * options, but check again, just in case.
383 if (*filename == NULL)
384 elog(ERROR, "filename is required for file_fdw foreign tables");
386 *other_options = options;
390 * Retrieve per-column generic options from pg_attribute and construct a list
391 * of DefElems representing them.
393 * At the moment we only have "force_not_null", and "force_null",
394 * which should each be combined into a single DefElem listing all such
395 * columns, since that's what COPY expects.
398 get_file_fdw_attribute_options(Oid relid)
404 List *fnncolumns = NIL;
405 List *fncolumns = NIL;
409 rel = heap_open(relid, AccessShareLock);
410 tupleDesc = RelationGetDescr(rel);
411 natts = tupleDesc->natts;
413 /* Retrieve FDW options for all user-defined attributes. */
414 for (attnum = 1; attnum <= natts; attnum++)
416 Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
420 /* Skip dropped attributes. */
421 if (attr->attisdropped)
424 options = GetForeignColumnOptions(relid, attnum);
427 DefElem *def = (DefElem *) lfirst(lc);
429 if (strcmp(def->defname, "force_not_null") == 0)
431 if (defGetBoolean(def))
433 char *attname = pstrdup(NameStr(attr->attname));
435 fnncolumns = lappend(fnncolumns, makeString(attname));
438 else if (strcmp(def->defname, "force_null") == 0)
440 if (defGetBoolean(def))
442 char *attname = pstrdup(NameStr(attr->attname));
444 fncolumns = lappend(fncolumns, makeString(attname));
447 /* maybe in future handle other options here */
451 heap_close(rel, AccessShareLock);
454 * Return DefElem only when some column(s) have force_not_null /
455 * force_null options set
457 if (fnncolumns != NIL)
458 options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns));
460 if (fncolumns != NIL)
461 options = lappend(options, makeDefElem("force_null", (Node *) fncolumns));
467 * fileGetForeignRelSize
468 * Obtain relation size estimates for a foreign table
471 fileGetForeignRelSize(PlannerInfo *root,
475 FileFdwPlanState *fdw_private;
478 * Fetch options. We only need filename at this point, but we might as
479 * well get everything and not need to re-fetch it later in planning.
481 fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState));
482 fileGetOptions(foreigntableid,
483 &fdw_private->filename, &fdw_private->options);
484 baserel->fdw_private = (void *) fdw_private;
486 /* Estimate relation size */
487 estimate_size(root, baserel, fdw_private);
491 * fileGetForeignPaths
492 * Create possible access paths for a scan on the foreign table
494 * Currently we don't support any push-down feature, so there is only one
495 * possible access path, which simply returns all records in the order in
499 fileGetForeignPaths(PlannerInfo *root,
503 FileFdwPlanState *fdw_private = (FileFdwPlanState *) baserel->fdw_private;
507 List *coptions = NIL;
509 /* Decide whether to selectively perform binary conversion */
510 if (check_selective_binary_conversion(baserel,
513 coptions = list_make1(makeDefElem("convert_selectively",
517 estimate_costs(root, baserel, fdw_private,
518 &startup_cost, &total_cost);
521 * Create a ForeignPath node and add it as only possible path. We use the
522 * fdw_private list of the path to carry the convert_selectively option;
523 * it will be propagated into the fdw_private list of the Plan node.
525 add_path(baserel, (Path *)
526 create_foreignscan_path(root, baserel,
527 NULL, /* default pathtarget */
531 NIL, /* no pathkeys */
532 NULL, /* no outer rel either */
533 NULL, /* no extra plan */
537 * If data file was sorted, and we knew it somehow, we could insert
538 * appropriate pathkeys into the ForeignPath node to tell the planner
545 * Create a ForeignScan plan node for scanning the foreign table
548 fileGetForeignPlan(PlannerInfo *root,
551 ForeignPath *best_path,
556 Index scan_relid = baserel->relid;
559 * We have no native ability to evaluate restriction clauses, so we just
560 * put all the scan_clauses into the plan node's qual list for the
561 * executor to check. So all we have to do here is strip RestrictInfo
562 * nodes from the clauses and ignore pseudoconstants (which will be
563 * handled elsewhere).
565 scan_clauses = extract_actual_clauses(scan_clauses, false);
567 /* Create the ForeignScan node */
568 return make_foreignscan(tlist,
571 NIL, /* no expressions to evaluate */
572 best_path->fdw_private,
573 NIL, /* no custom tlist */
574 NIL, /* no remote quals */
579 * fileExplainForeignScan
580 * Produce extra output for EXPLAIN
583 fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
588 /* Fetch options --- we only need filename at this point */
589 fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
590 &filename, &options);
592 ExplainPropertyText("Foreign File", filename, es);
594 /* Suppress file size if we're not showing cost details */
597 struct stat stat_buf;
599 if (stat(filename, &stat_buf) == 0)
600 ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
606 * fileBeginForeignScan
607 * Initiate access to the file by creating CopyState
610 fileBeginForeignScan(ForeignScanState *node, int eflags)
612 ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
616 FileFdwExecutionState *festate;
619 * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL.
621 if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
624 /* Fetch options of foreign table */
625 fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
626 &filename, &options);
628 /* Add any options from the plan (currently only convert_selectively) */
629 options = list_concat(options, plan->fdw_private);
632 * Create CopyState from FDW options. We always acquire all columns, so
633 * as to match the expected ScanTupleSlot signature.
635 cstate = BeginCopyFrom(node->ss.ss_currentRelation,
642 * Save state in node->fdw_state. We must save enough information to call
643 * BeginCopyFrom() again.
645 festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
646 festate->filename = filename;
647 festate->options = options;
648 festate->cstate = cstate;
650 node->fdw_state = (void *) festate;
654 * fileIterateForeignScan
655 * Read next record from the data file and store it into the
656 * ScanTupleSlot as a virtual tuple
658 static TupleTableSlot *
659 fileIterateForeignScan(ForeignScanState *node)
661 FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
662 TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
664 ErrorContextCallback errcallback;
666 /* Set up callback to identify error line number. */
667 errcallback.callback = CopyFromErrorCallback;
668 errcallback.arg = (void *) festate->cstate;
669 errcallback.previous = error_context_stack;
670 error_context_stack = &errcallback;
673 * The protocol for loading a virtual tuple into a slot is first
674 * ExecClearTuple, then fill the values/isnull arrays, then
675 * ExecStoreVirtualTuple. If we don't find another row in the file, we
676 * just skip the last step, leaving the slot empty as required.
678 * We can pass ExprContext = NULL because we read all columns from the
679 * file, so no need to evaluate default expressions.
681 * We can also pass tupleOid = NULL because we don't allow oids for
684 ExecClearTuple(slot);
685 found = NextCopyFrom(festate->cstate, NULL,
686 slot->tts_values, slot->tts_isnull,
689 ExecStoreVirtualTuple(slot);
691 /* Remove error callback. */
692 error_context_stack = errcallback.previous;
698 * fileReScanForeignScan
699 * Rescan table, possibly with new parameters
702 fileReScanForeignScan(ForeignScanState *node)
704 FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
706 EndCopyFrom(festate->cstate);
708 festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
717 * Finish scanning foreign table and dispose objects used for this scan
720 fileEndForeignScan(ForeignScanState *node)
722 FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
724 /* if festate is NULL, we are in EXPLAIN; nothing to do */
726 EndCopyFrom(festate->cstate);
730 * fileAnalyzeForeignTable
731 * Test whether analyzing this foreign table is supported
734 fileAnalyzeForeignTable(Relation relation,
735 AcquireSampleRowsFunc *func,
736 BlockNumber *totalpages)
740 struct stat stat_buf;
742 /* Fetch options of foreign table */
743 fileGetOptions(RelationGetRelid(relation), &filename, &options);
746 * Get size of the file. (XXX if we fail here, would it be better to just
747 * return false to skip analyzing the table?)
749 if (stat(filename, &stat_buf) < 0)
751 (errcode_for_file_access(),
752 errmsg("could not stat file \"%s\": %m",
756 * Convert size to pages. Must return at least 1 so that we can tell
757 * later on that pg_class.relpages is not default.
759 *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
763 *func = file_acquire_sample_rows;
769 * fileIsForeignScanParallelSafe
770 * Reading a file in a parallel worker should work just the same as
771 * reading it in the leader, so mark scans safe.
774 fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel,
781 * check_selective_binary_conversion
783 * Check to see if it's useful to convert only a subset of the file's columns
784 * to binary. If so, construct a list of the column names to be converted,
785 * return that at *columns, and return TRUE. (Note that it's possible to
786 * determine that no columns need be converted, for instance with a COUNT(*)
787 * query. So we can't use returning a NIL list to indicate failure.)
790 check_selective_binary_conversion(RelOptInfo *baserel,
799 Bitmapset *attrs_used = NULL;
800 bool has_wholerow = false;
804 *columns = NIL; /* default result */
807 * Check format of the file. If binary format, this is irrelevant.
809 table = GetForeignTable(foreigntableid);
810 foreach(lc, table->options)
812 DefElem *def = (DefElem *) lfirst(lc);
814 if (strcmp(def->defname, "format") == 0)
816 char *format = defGetString(def);
818 if (strcmp(format, "binary") == 0)
824 /* Collect all the attributes needed for joins or final output. */
825 pull_varattnos((Node *) baserel->reltarget->exprs, baserel->relid,
828 /* Add all the attributes used by restriction clauses. */
829 foreach(lc, baserel->baserestrictinfo)
831 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
833 pull_varattnos((Node *) rinfo->clause, baserel->relid,
837 /* Convert attribute numbers to column names. */
838 rel = heap_open(foreigntableid, AccessShareLock);
839 tupleDesc = RelationGetDescr(rel);
841 while ((attnum = bms_first_member(attrs_used)) >= 0)
843 /* Adjust for system attributes. */
844 attnum += FirstLowInvalidHeapAttributeNumber;
852 /* Ignore system attributes. */
856 /* Get user attributes. */
859 Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
860 char *attname = NameStr(attr->attname);
862 /* Skip dropped attributes (probably shouldn't see any here). */
863 if (attr->attisdropped)
865 *columns = lappend(*columns, makeString(pstrdup(attname)));
869 /* Count non-dropped user attributes while we have the tupdesc. */
871 for (i = 0; i < tupleDesc->natts; i++)
873 Form_pg_attribute attr = tupleDesc->attrs[i];
875 if (attr->attisdropped)
880 heap_close(rel, AccessShareLock);
882 /* If there's a whole-row reference, fail: we need all the columns. */
889 /* If all the user attributes are needed, fail. */
890 if (numattrs == list_length(*columns))
900 * Estimate size of a foreign table.
902 * The main result is returned in baserel->rows. We also set
903 * fdw_private->pages and fdw_private->ntuples for later use in the cost
907 estimate_size(PlannerInfo *root, RelOptInfo *baserel,
908 FileFdwPlanState *fdw_private)
910 struct stat stat_buf;
916 * Get size of the file. It might not be there at plan time, though, in
917 * which case we have to use a default estimate.
919 if (stat(fdw_private->filename, &stat_buf) < 0)
920 stat_buf.st_size = 10 * BLCKSZ;
923 * Convert size to pages for use in I/O cost estimate later.
925 pages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
928 fdw_private->pages = pages;
931 * Estimate the number of tuples in the file.
933 if (baserel->pages > 0)
936 * We have # of pages and # of tuples from pg_class (that is, from a
937 * previous ANALYZE), so compute a tuples-per-page estimate and scale
938 * that by the current file size.
942 density = baserel->tuples / (double) baserel->pages;
943 ntuples = clamp_row_est(density * (double) pages);
948 * Otherwise we have to fake it. We back into this estimate using the
949 * planner's idea of the relation width; which is bogus if not all
950 * columns are being read, not to mention that the text representation
951 * of a row probably isn't the same size as its internal
952 * representation. Possibly we could do something better, but the
953 * real answer to anyone who complains is "ANALYZE" ...
957 tuple_width = MAXALIGN(baserel->reltarget->width) +
958 MAXALIGN(SizeofHeapTupleHeader);
959 ntuples = clamp_row_est((double) stat_buf.st_size /
960 (double) tuple_width);
962 fdw_private->ntuples = ntuples;
965 * Now estimate the number of rows returned by the scan after applying the
966 * baserestrictinfo quals.
969 clauselist_selectivity(root,
970 baserel->baserestrictinfo,
975 nrows = clamp_row_est(nrows);
977 /* Save the output-rows estimate for the planner */
978 baserel->rows = nrows;
982 * Estimate costs of scanning a foreign table.
984 * Results are returned in *startup_cost and *total_cost.
987 estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
988 FileFdwPlanState *fdw_private,
989 Cost *startup_cost, Cost *total_cost)
991 BlockNumber pages = fdw_private->pages;
992 double ntuples = fdw_private->ntuples;
997 * We estimate costs almost the same way as cost_seqscan(), thus assuming
998 * that I/O costs are equivalent to a regular table file of the same size.
999 * However, we take per-tuple CPU costs as 10x of a seqscan, to account
1000 * for the cost of parsing records.
1002 run_cost += seq_page_cost * pages;
1004 *startup_cost = baserel->baserestrictcost.startup;
1005 cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple;
1006 run_cost += cpu_per_tuple * ntuples;
1007 *total_cost = *startup_cost + run_cost;
1011 * file_acquire_sample_rows -- acquire a random sample of rows from the table
1013 * Selected rows are returned in the caller-allocated array rows[],
1014 * which must have at least targrows entries.
1015 * The actual number of rows selected is returned as the function result.
1016 * We also count the total number of rows in the file and return it into
1017 * *totalrows. Note that *totaldeadrows is always set to 0.
1019 * Note that the returned list of rows is not always in order by physical
1020 * position in the file. Therefore, correlation estimates derived later
1021 * may be meaningless, but it's OK because we don't use the estimates
1022 * currently (the planner only pays attention to correlation for indexscans).
1025 file_acquire_sample_rows(Relation onerel, int elevel,
1026 HeapTuple *rows, int targrows,
1027 double *totalrows, double *totaldeadrows)
1030 double rowstoskip = -1; /* -1 means not set yet */
1031 ReservoirStateData rstate;
1039 ErrorContextCallback errcallback;
1040 MemoryContext oldcontext = CurrentMemoryContext;
1041 MemoryContext tupcontext;
1044 Assert(targrows > 0);
1046 tupDesc = RelationGetDescr(onerel);
1047 values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
1048 nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
1050 /* Fetch options of foreign table */
1051 fileGetOptions(RelationGetRelid(onerel), &filename, &options);
1054 * Create CopyState from FDW options.
1056 cstate = BeginCopyFrom(onerel, filename, false, NIL, options);
1059 * Use per-tuple memory context to prevent leak of memory used to read
1060 * rows from the file with Copy routines.
1062 tupcontext = AllocSetContextCreate(CurrentMemoryContext,
1063 "file_fdw temporary context",
1064 ALLOCSET_DEFAULT_SIZES);
1066 /* Prepare for sampling rows */
1067 reservoir_init_selection_state(&rstate, targrows);
1069 /* Set up callback to identify error line number. */
1070 errcallback.callback = CopyFromErrorCallback;
1071 errcallback.arg = (void *) cstate;
1072 errcallback.previous = error_context_stack;
1073 error_context_stack = &errcallback;
1079 /* Check for user-requested abort or sleep */
1080 vacuum_delay_point();
1082 /* Fetch next row */
1083 MemoryContextReset(tupcontext);
1084 MemoryContextSwitchTo(tupcontext);
1086 found = NextCopyFrom(cstate, NULL, values, nulls, NULL);
1088 MemoryContextSwitchTo(oldcontext);
1094 * The first targrows sample rows are simply copied into the
1095 * reservoir. Then we start replacing tuples in the sample until we
1096 * reach the end of the relation. This algorithm is from Jeff Vitter's
1097 * paper (see more info in commands/analyze.c).
1099 if (numrows < targrows)
1101 rows[numrows++] = heap_form_tuple(tupDesc, values, nulls);
1106 * t in Vitter's paper is the number of records already processed.
1107 * If we need to compute a new S value, we must use the
1108 * not-yet-incremented value of totalrows as t.
1111 rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows);
1113 if (rowstoskip <= 0)
1116 * Found a suitable tuple, so save it, replacing one old tuple
1119 int k = (int) (targrows * sampler_random_fract(rstate.randstate));
1121 Assert(k >= 0 && k < targrows);
1122 heap_freetuple(rows[k]);
1123 rows[k] = heap_form_tuple(tupDesc, values, nulls);
1132 /* Remove error callback. */
1133 error_context_stack = errcallback.previous;
1136 MemoryContextDelete(tupcontext);
1138 EndCopyFrom(cstate);
1144 * Emit some interesting relation info
1147 (errmsg("\"%s\": file contains %.0f rows; "
1148 "%d rows in sample",
1149 RelationGetRelationName(onerel),
1150 *totalrows, numrows)));