1 /*-------------------------------------------------------------------------
4 * foreign-data wrapper for server-side flat files.
6 * Copyright (c) 2010-2012, PostgreSQL Global Development Group
9 * contrib/file_fdw/file_fdw.c
11 *-------------------------------------------------------------------------
18 #include "access/reloptions.h"
19 #include "catalog/pg_foreign_table.h"
20 #include "commands/copy.h"
21 #include "commands/defrem.h"
22 #include "commands/explain.h"
23 #include "foreign/fdwapi.h"
24 #include "foreign/foreign.h"
25 #include "miscadmin.h"
26 #include "nodes/makefuncs.h"
27 #include "optimizer/cost.h"
28 #include "optimizer/pathnode.h"
29 #include "utils/rel.h"
34 * Describes the valid options for objects that use this wrapper.
39 Oid optcontext; /* Oid of catalog in which option may appear */
43 * Valid options for file_fdw.
44 * These options are based on the options for COPY FROM command.
45 * But note that force_not_null is handled as a boolean option attached to
46 * each column, not as a table option.
48 * Note: If you are adding new option for user mapping, you need to modify
49 * fileGetOptions(), which currently doesn't bother to look at user mappings.
51 static struct FileFdwOption valid_options[] = {
53 {"filename", ForeignTableRelationId},
56 /* oids option is not supported */
57 {"format", ForeignTableRelationId},
58 {"header", ForeignTableRelationId},
59 {"delimiter", ForeignTableRelationId},
60 {"quote", ForeignTableRelationId},
61 {"escape", ForeignTableRelationId},
62 {"null", ForeignTableRelationId},
63 {"encoding", ForeignTableRelationId},
64 {"force_not_null", AttributeRelationId},
67 * force_quote is not supported by file_fdw because it's for COPY TO.
75 * FDW-specific information for ForeignScanState.fdw_state.
77 typedef struct FileFdwExecutionState
79 char *filename; /* file to read */
80 List *options; /* merged COPY options, excluding filename */
81 CopyState cstate; /* state of reading file */
82 } FileFdwExecutionState;
87 extern Datum file_fdw_handler(PG_FUNCTION_ARGS);
88 extern Datum file_fdw_validator(PG_FUNCTION_ARGS);
90 PG_FUNCTION_INFO_V1(file_fdw_handler);
91 PG_FUNCTION_INFO_V1(file_fdw_validator);
94 * FDW callback routines
96 static void filePlanForeignScan(Oid foreigntableid,
99 static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es);
100 static void fileBeginForeignScan(ForeignScanState *node, int eflags);
101 static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
102 static void fileReScanForeignScan(ForeignScanState *node);
103 static void fileEndForeignScan(ForeignScanState *node);
108 static bool is_valid_option(const char *option, Oid context);
109 static void fileGetOptions(Oid foreigntableid,
110 char **filename, List **other_options);
111 static List *get_file_fdw_attribute_options(Oid relid);
112 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
113 const char *filename,
114 Cost *startup_cost, Cost *total_cost);
118 * Foreign-data wrapper handler function: return a struct with pointers
119 * to my callback routines.
122 file_fdw_handler(PG_FUNCTION_ARGS)
124 FdwRoutine *fdwroutine = makeNode(FdwRoutine);
126 fdwroutine->PlanForeignScan = filePlanForeignScan;
127 fdwroutine->ExplainForeignScan = fileExplainForeignScan;
128 fdwroutine->BeginForeignScan = fileBeginForeignScan;
129 fdwroutine->IterateForeignScan = fileIterateForeignScan;
130 fdwroutine->ReScanForeignScan = fileReScanForeignScan;
131 fdwroutine->EndForeignScan = fileEndForeignScan;
133 PG_RETURN_POINTER(fdwroutine);
137 * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER,
138 * USER MAPPING or FOREIGN TABLE that uses file_fdw.
140 * Raise an ERROR if the option or its value is considered invalid.
143 file_fdw_validator(PG_FUNCTION_ARGS)
145 List *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
146 Oid catalog = PG_GETARG_OID(1);
147 char *filename = NULL;
148 DefElem *force_not_null = NULL;
149 List *other_options = NIL;
153 * Only superusers are allowed to set options of a file_fdw foreign table.
154 * This is because the filename is one of those options, and we don't want
155 * non-superusers to be able to determine which file gets read.
157 * Putting this sort of permissions check in a validator is a bit of a
158 * crock, but there doesn't seem to be any other place that can enforce
159 * the check more cleanly.
161 * Note that the valid_options[] array disallows setting filename at any
162 * options level other than foreign table --- otherwise there'd still be a
165 if (catalog == ForeignTableRelationId && !superuser())
167 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
168 errmsg("only superuser can change options of a file_fdw foreign table")));
171 * Check that only options supported by file_fdw, and allowed for the
172 * current object type, are given.
174 foreach(cell, options_list)
176 DefElem *def = (DefElem *) lfirst(cell);
178 if (!is_valid_option(def->defname, catalog))
180 struct FileFdwOption *opt;
184 * Unknown option specified, complain about it. Provide a hint
185 * with list of valid options for the object.
187 initStringInfo(&buf);
188 for (opt = valid_options; opt->optname; opt++)
190 if (catalog == opt->optcontext)
191 appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "",
196 (errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
197 errmsg("invalid option \"%s\"", def->defname),
198 errhint("Valid options in this context are: %s",
203 * Separate out filename and force_not_null, since ProcessCopyOptions
204 * won't accept them. (force_not_null only comes in a boolean
205 * per-column flavor here.)
207 if (strcmp(def->defname, "filename") == 0)
211 (errcode(ERRCODE_SYNTAX_ERROR),
212 errmsg("conflicting or redundant options")));
213 filename = defGetString(def);
215 else if (strcmp(def->defname, "force_not_null") == 0)
219 (errcode(ERRCODE_SYNTAX_ERROR),
220 errmsg("conflicting or redundant options")));
221 force_not_null = def;
222 /* Don't care what the value is, as long as it's a legal boolean */
223 (void) defGetBoolean(def);
226 other_options = lappend(other_options, def);
230 * Now apply the core COPY code's validation logic for more checks.
232 ProcessCopyOptions(NULL, true, other_options);
235 * Filename option is required for file_fdw foreign tables.
237 if (catalog == ForeignTableRelationId && filename == NULL)
239 (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
240 errmsg("filename is required for file_fdw foreign tables")));
246 * Check if the provided option is one of the valid options.
247 * context is the Oid of the catalog holding the object the option is for.
250 is_valid_option(const char *option, Oid context)
252 struct FileFdwOption *opt;
254 for (opt = valid_options; opt->optname; opt++)
256 if (context == opt->optcontext && strcmp(opt->optname, option) == 0)
263 * Fetch the options for a file_fdw foreign table.
265 * We have to separate out "filename" from the other options because
266 * it must not appear in the options list passed to the core COPY code.
269 fileGetOptions(Oid foreigntableid,
270 char **filename, List **other_options)
273 ForeignServer *server;
274 ForeignDataWrapper *wrapper;
280 * Extract options from FDW objects. We ignore user mappings because
281 * file_fdw doesn't have any options that can be specified there.
283 * (XXX Actually, given the current contents of valid_options[], there's
284 * no point in examining anything except the foreign table's own options.
287 table = GetForeignTable(foreigntableid);
288 server = GetForeignServer(table->serverid);
289 wrapper = GetForeignDataWrapper(server->fdwid);
292 options = list_concat(options, wrapper->options);
293 options = list_concat(options, server->options);
294 options = list_concat(options, table->options);
295 options = list_concat(options, get_file_fdw_attribute_options(foreigntableid));
298 * Separate out the filename.
304 DefElem *def = (DefElem *) lfirst(lc);
306 if (strcmp(def->defname, "filename") == 0)
308 *filename = defGetString(def);
309 options = list_delete_cell(options, lc, prev);
316 * The validator should have checked that a filename was included in the
317 * options, but check again, just in case.
319 if (*filename == NULL)
320 elog(ERROR, "filename is required for file_fdw foreign tables");
322 *other_options = options;
326 * Retrieve per-column generic options from pg_attribute and construct a list
327 * of DefElems representing them.
329 * At the moment we only have "force_not_null", which should be combined into
330 * a single DefElem listing all such columns, since that's what COPY expects.
333 get_file_fdw_attribute_options(Oid relid)
339 List *fnncolumns = NIL;
341 rel = heap_open(relid, AccessShareLock);
342 tupleDesc = RelationGetDescr(rel);
343 natts = tupleDesc->natts;
345 /* Retrieve FDW options for all user-defined attributes. */
346 for (attnum = 1; attnum <= natts; attnum++)
348 Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
352 /* Skip dropped attributes. */
353 if (attr->attisdropped)
356 options = GetForeignColumnOptions(relid, attnum);
359 DefElem *def = (DefElem *) lfirst(lc);
361 if (strcmp(def->defname, "force_not_null") == 0)
363 if (defGetBoolean(def))
365 char *attname = pstrdup(NameStr(attr->attname));
367 fnncolumns = lappend(fnncolumns, makeString(attname));
370 /* maybe in future handle other options here */
374 heap_close(rel, AccessShareLock);
376 /* Return DefElem only when some column(s) have force_not_null */
377 if (fnncolumns != NIL)
378 return list_make1(makeDefElem("force_not_null", (Node *) fnncolumns));
384 * filePlanForeignScan
385 * Create possible access paths for a scan on the foreign table
387 * Currently we don't support any push-down feature, so there is only one
388 * possible access path, which simply returns all records in the order in
392 filePlanForeignScan(Oid foreigntableid,
401 /* Fetch options --- we only need filename at this point */
402 fileGetOptions(foreigntableid, &filename, &options);
404 /* Estimate costs and update baserel->rows */
405 estimate_costs(root, baserel, filename,
406 &startup_cost, &total_cost);
408 /* Create a ForeignPath node and add it as only possible path */
409 add_path(baserel, (Path *)
410 create_foreignscan_path(root, baserel,
414 NIL, /* no pathkeys */
415 NULL, /* no outer rel either */
417 NIL)); /* no fdw_private data */
420 * If data file was sorted, and we knew it somehow, we could insert
421 * appropriate pathkeys into the ForeignPath node to tell the planner that.
426 * fileExplainForeignScan
427 * Produce extra output for EXPLAIN
430 fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
435 /* Fetch options --- we only need filename at this point */
436 fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
437 &filename, &options);
439 ExplainPropertyText("Foreign File", filename, es);
441 /* Suppress file size if we're not showing cost details */
444 struct stat stat_buf;
446 if (stat(filename, &stat_buf) == 0)
447 ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
453 * fileBeginForeignScan
454 * Initiate access to the file by creating CopyState
457 fileBeginForeignScan(ForeignScanState *node, int eflags)
462 FileFdwExecutionState *festate;
465 * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL.
467 if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
470 /* Fetch options of foreign table */
471 fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
472 &filename, &options);
475 * Create CopyState from FDW options. We always acquire all columns, so
476 * as to match the expected ScanTupleSlot signature.
478 cstate = BeginCopyFrom(node->ss.ss_currentRelation,
484 * Save state in node->fdw_state. We must save enough information to call
485 * BeginCopyFrom() again.
487 festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
488 festate->filename = filename;
489 festate->options = options;
490 festate->cstate = cstate;
492 node->fdw_state = (void *) festate;
496 * fileIterateForeignScan
497 * Read next record from the data file and store it into the
498 * ScanTupleSlot as a virtual tuple
500 static TupleTableSlot *
501 fileIterateForeignScan(ForeignScanState *node)
503 FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
504 TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
506 ErrorContextCallback errcontext;
508 /* Set up callback to identify error line number. */
509 errcontext.callback = CopyFromErrorCallback;
510 errcontext.arg = (void *) festate->cstate;
511 errcontext.previous = error_context_stack;
512 error_context_stack = &errcontext;
515 * The protocol for loading a virtual tuple into a slot is first
516 * ExecClearTuple, then fill the values/isnull arrays, then
517 * ExecStoreVirtualTuple. If we don't find another row in the file, we
518 * just skip the last step, leaving the slot empty as required.
520 * We can pass ExprContext = NULL because we read all columns from the
521 * file, so no need to evaluate default expressions.
523 * We can also pass tupleOid = NULL because we don't allow oids for
526 ExecClearTuple(slot);
527 found = NextCopyFrom(festate->cstate, NULL,
528 slot->tts_values, slot->tts_isnull,
531 ExecStoreVirtualTuple(slot);
533 /* Remove error callback. */
534 error_context_stack = errcontext.previous;
541 * Finish scanning foreign table and dispose objects used for this scan
544 fileEndForeignScan(ForeignScanState *node)
546 FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
548 /* if festate is NULL, we are in EXPLAIN; nothing to do */
550 EndCopyFrom(festate->cstate);
554 * fileReScanForeignScan
555 * Rescan table, possibly with new parameters
558 fileReScanForeignScan(ForeignScanState *node)
560 FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
562 EndCopyFrom(festate->cstate);
564 festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
571 * Estimate costs of scanning a foreign table.
573 * In addition to setting *startup_cost and *total_cost, this should
574 * update baserel->rows.
577 estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
578 const char *filename,
579 Cost *startup_cost, Cost *total_cost)
581 struct stat stat_buf;
590 * Get size of the file. It might not be there at plan time, though, in
591 * which case we have to use a default estimate.
593 if (stat(filename, &stat_buf) < 0)
594 stat_buf.st_size = 10 * BLCKSZ;
597 * Convert size to pages for use in I/O cost estimate below.
599 pages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
604 * Estimate the number of tuples in the file. We back into this estimate
605 * using the planner's idea of the relation width; which is bogus if not
606 * all columns are being read, not to mention that the text representation
607 * of a row probably isn't the same size as its internal representation.
610 tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
612 ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
615 * Now estimate the number of rows returned by the scan after applying the
616 * baserestrictinfo quals. This is pretty bogus too, since the planner
617 * will have no stats about the relation, but it's better than nothing.
620 clauselist_selectivity(root,
621 baserel->baserestrictinfo,
626 nrows = clamp_row_est(nrows);
628 /* Save the output-rows estimate for the planner */
629 baserel->rows = nrows;
632 * Now estimate costs. We estimate costs almost the same way as
633 * cost_seqscan(), thus assuming that I/O costs are equivalent to a
634 * regular table file of the same size. However, we take per-tuple CPU
635 * costs as 10x of a seqscan, to account for the cost of parsing records.
637 run_cost += seq_page_cost * pages;
639 *startup_cost = baserel->baserestrictcost.startup;
640 cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple;
641 run_cost += cpu_per_tuple * ntuples;
642 *total_cost = *startup_cost + run_cost;