]> granicus.if.org Git - postgresql/blob - contrib/file_fdw/file_fdw.c
Add GetForeignColumnOptions() to foreign.c, and add some documentation.
[postgresql] / contrib / file_fdw / file_fdw.c
1 /*-------------------------------------------------------------------------
2  *
3  * file_fdw.c
4  *                foreign-data wrapper for server-side flat files.
5  *
6  * Copyright (c) 2010-2012, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *                contrib/file_fdw/file_fdw.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include <sys/stat.h>
16 #include <unistd.h>
17
18 #include "access/reloptions.h"
19 #include "catalog/pg_foreign_table.h"
20 #include "commands/copy.h"
21 #include "commands/defrem.h"
22 #include "commands/explain.h"
23 #include "foreign/fdwapi.h"
24 #include "foreign/foreign.h"
25 #include "miscadmin.h"
26 #include "nodes/makefuncs.h"
27 #include "optimizer/cost.h"
28 #include "optimizer/pathnode.h"
29 #include "utils/rel.h"
30
31 PG_MODULE_MAGIC;
32
33 /*
34  * Describes the valid options for objects that use this wrapper.
35  */
36 struct FileFdwOption
37 {
38         const char *optname;
39         Oid                     optcontext;             /* Oid of catalog in which option may appear */
40 };
41
42 /*
43  * Valid options for file_fdw.
44  * These options are based on the options for COPY FROM command.
45  * But note that force_not_null is handled as a boolean option attached to
46  * each column, not as a table option.
47  *
48  * Note: If you are adding new option for user mapping, you need to modify
49  * fileGetOptions(), which currently doesn't bother to look at user mappings.
50  */
51 static struct FileFdwOption valid_options[] = {
52         /* File options */
53         {"filename", ForeignTableRelationId},
54
55         /* Format options */
56         /* oids option is not supported */
57         {"format", ForeignTableRelationId},
58         {"header", ForeignTableRelationId},
59         {"delimiter", ForeignTableRelationId},
60         {"quote", ForeignTableRelationId},
61         {"escape", ForeignTableRelationId},
62         {"null", ForeignTableRelationId},
63         {"encoding", ForeignTableRelationId},
64         {"force_not_null", AttributeRelationId},
65
66         /*
67          * force_quote is not supported by file_fdw because it's for COPY TO.
68          */
69
70         /* Sentinel */
71         {NULL, InvalidOid}
72 };
73
74 /*
75  * FDW-specific information for ForeignScanState.fdw_state.
76  */
77 typedef struct FileFdwExecutionState
78 {
79         char       *filename;           /* file to read */
80         List       *options;            /* merged COPY options, excluding filename */
81         CopyState       cstate;                 /* state of reading file */
82 } FileFdwExecutionState;
83
84 /*
85  * SQL functions
86  */
87 extern Datum file_fdw_handler(PG_FUNCTION_ARGS);
88 extern Datum file_fdw_validator(PG_FUNCTION_ARGS);
89
90 PG_FUNCTION_INFO_V1(file_fdw_handler);
91 PG_FUNCTION_INFO_V1(file_fdw_validator);
92
93 /*
94  * FDW callback routines
95  */
96 static void filePlanForeignScan(Oid foreigntableid,
97                                         PlannerInfo *root,
98                                         RelOptInfo *baserel);
99 static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es);
100 static void fileBeginForeignScan(ForeignScanState *node, int eflags);
101 static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
102 static void fileReScanForeignScan(ForeignScanState *node);
103 static void fileEndForeignScan(ForeignScanState *node);
104
105 /*
106  * Helper functions
107  */
108 static bool is_valid_option(const char *option, Oid context);
109 static void fileGetOptions(Oid foreigntableid,
110                            char **filename, List **other_options);
111 static List *get_file_fdw_attribute_options(Oid relid);
112 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
113                            const char *filename,
114                            Cost *startup_cost, Cost *total_cost);
115
116
117 /*
118  * Foreign-data wrapper handler function: return a struct with pointers
119  * to my callback routines.
120  */
121 Datum
122 file_fdw_handler(PG_FUNCTION_ARGS)
123 {
124         FdwRoutine *fdwroutine = makeNode(FdwRoutine);
125
126         fdwroutine->PlanForeignScan = filePlanForeignScan;
127         fdwroutine->ExplainForeignScan = fileExplainForeignScan;
128         fdwroutine->BeginForeignScan = fileBeginForeignScan;
129         fdwroutine->IterateForeignScan = fileIterateForeignScan;
130         fdwroutine->ReScanForeignScan = fileReScanForeignScan;
131         fdwroutine->EndForeignScan = fileEndForeignScan;
132
133         PG_RETURN_POINTER(fdwroutine);
134 }
135
136 /*
137  * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER,
138  * USER MAPPING or FOREIGN TABLE that uses file_fdw.
139  *
140  * Raise an ERROR if the option or its value is considered invalid.
141  */
142 Datum
143 file_fdw_validator(PG_FUNCTION_ARGS)
144 {
145         List       *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
146         Oid                     catalog = PG_GETARG_OID(1);
147         char       *filename = NULL;
148         DefElem    *force_not_null = NULL;
149         List       *other_options = NIL;
150         ListCell   *cell;
151
152         /*
153          * Only superusers are allowed to set options of a file_fdw foreign table.
154          * This is because the filename is one of those options, and we don't want
155          * non-superusers to be able to determine which file gets read.
156          *
157          * Putting this sort of permissions check in a validator is a bit of a
158          * crock, but there doesn't seem to be any other place that can enforce
159          * the check more cleanly.
160          *
161          * Note that the valid_options[] array disallows setting filename at any
162          * options level other than foreign table --- otherwise there'd still be a
163          * security hole.
164          */
165         if (catalog == ForeignTableRelationId && !superuser())
166                 ereport(ERROR,
167                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
168                                  errmsg("only superuser can change options of a file_fdw foreign table")));
169
170         /*
171          * Check that only options supported by file_fdw, and allowed for the
172          * current object type, are given.
173          */
174         foreach(cell, options_list)
175         {
176                 DefElem    *def = (DefElem *) lfirst(cell);
177
178                 if (!is_valid_option(def->defname, catalog))
179                 {
180                         struct FileFdwOption *opt;
181                         StringInfoData buf;
182
183                         /*
184                          * Unknown option specified, complain about it. Provide a hint
185                          * with list of valid options for the object.
186                          */
187                         initStringInfo(&buf);
188                         for (opt = valid_options; opt->optname; opt++)
189                         {
190                                 if (catalog == opt->optcontext)
191                                         appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "",
192                                                                          opt->optname);
193                         }
194
195                         ereport(ERROR,
196                                         (errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
197                                          errmsg("invalid option \"%s\"", def->defname),
198                                          errhint("Valid options in this context are: %s",
199                                                          buf.data)));
200                 }
201
202                 /*
203                  * Separate out filename and force_not_null, since ProcessCopyOptions
204                  * won't accept them.  (force_not_null only comes in a boolean
205                  * per-column flavor here.)
206                  */
207                 if (strcmp(def->defname, "filename") == 0)
208                 {
209                         if (filename)
210                                 ereport(ERROR,
211                                                 (errcode(ERRCODE_SYNTAX_ERROR),
212                                                  errmsg("conflicting or redundant options")));
213                         filename = defGetString(def);
214                 }
215                 else if (strcmp(def->defname, "force_not_null") == 0)
216                 {
217                         if (force_not_null)
218                                 ereport(ERROR,
219                                                 (errcode(ERRCODE_SYNTAX_ERROR),
220                                                  errmsg("conflicting or redundant options")));
221                         force_not_null = def;
222                         /* Don't care what the value is, as long as it's a legal boolean */
223                         (void) defGetBoolean(def);
224                 }
225                 else
226                         other_options = lappend(other_options, def);
227         }
228
229         /*
230          * Now apply the core COPY code's validation logic for more checks.
231          */
232         ProcessCopyOptions(NULL, true, other_options);
233
234         /*
235          * Filename option is required for file_fdw foreign tables.
236          */
237         if (catalog == ForeignTableRelationId && filename == NULL)
238                 ereport(ERROR,
239                                 (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
240                                  errmsg("filename is required for file_fdw foreign tables")));
241
242         PG_RETURN_VOID();
243 }
244
245 /*
246  * Check if the provided option is one of the valid options.
247  * context is the Oid of the catalog holding the object the option is for.
248  */
249 static bool
250 is_valid_option(const char *option, Oid context)
251 {
252         struct FileFdwOption *opt;
253
254         for (opt = valid_options; opt->optname; opt++)
255         {
256                 if (context == opt->optcontext && strcmp(opt->optname, option) == 0)
257                         return true;
258         }
259         return false;
260 }
261
262 /*
263  * Fetch the options for a file_fdw foreign table.
264  *
265  * We have to separate out "filename" from the other options because
266  * it must not appear in the options list passed to the core COPY code.
267  */
268 static void
269 fileGetOptions(Oid foreigntableid,
270                            char **filename, List **other_options)
271 {
272         ForeignTable *table;
273         ForeignServer *server;
274         ForeignDataWrapper *wrapper;
275         List       *options;
276         ListCell   *lc,
277                            *prev;
278
279         /*
280          * Extract options from FDW objects.  We ignore user mappings because
281          * file_fdw doesn't have any options that can be specified there.
282          *
283          * (XXX Actually, given the current contents of valid_options[], there's
284          * no point in examining anything except the foreign table's own options.
285          * Simplify?)
286          */
287         table = GetForeignTable(foreigntableid);
288         server = GetForeignServer(table->serverid);
289         wrapper = GetForeignDataWrapper(server->fdwid);
290
291         options = NIL;
292         options = list_concat(options, wrapper->options);
293         options = list_concat(options, server->options);
294         options = list_concat(options, table->options);
295         options = list_concat(options, get_file_fdw_attribute_options(foreigntableid));
296
297         /*
298          * Separate out the filename.
299          */
300         *filename = NULL;
301         prev = NULL;
302         foreach(lc, options)
303         {
304                 DefElem    *def = (DefElem *) lfirst(lc);
305
306                 if (strcmp(def->defname, "filename") == 0)
307                 {
308                         *filename = defGetString(def);
309                         options = list_delete_cell(options, lc, prev);
310                         break;
311                 }
312                 prev = lc;
313         }
314
315         /*
316          * The validator should have checked that a filename was included in the
317          * options, but check again, just in case.
318          */
319         if (*filename == NULL)
320                 elog(ERROR, "filename is required for file_fdw foreign tables");
321
322         *other_options = options;
323 }
324
325 /*
326  * Retrieve per-column generic options from pg_attribute and construct a list
327  * of DefElems representing them.
328  *
329  * At the moment we only have "force_not_null", which should be combined into
330  * a single DefElem listing all such columns, since that's what COPY expects.
331  */
332 static List *
333 get_file_fdw_attribute_options(Oid relid)
334 {
335         Relation        rel;
336         TupleDesc       tupleDesc;
337         AttrNumber      natts;
338         AttrNumber      attnum;
339         List       *fnncolumns = NIL;
340
341         rel = heap_open(relid, AccessShareLock);
342         tupleDesc = RelationGetDescr(rel);
343         natts = tupleDesc->natts;
344
345         /* Retrieve FDW options for all user-defined attributes. */
346         for (attnum = 1; attnum <= natts; attnum++)
347         {
348                 Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
349                 List       *options;
350                 ListCell   *lc;
351
352                 /* Skip dropped attributes. */
353                 if (attr->attisdropped)
354                         continue;
355
356                 options = GetForeignColumnOptions(relid, attnum);
357                 foreach(lc, options)
358                 {
359                         DefElem    *def = (DefElem *) lfirst(lc);
360
361                         if (strcmp(def->defname, "force_not_null") == 0)
362                         {
363                                 if (defGetBoolean(def))
364                                 {
365                                         char   *attname = pstrdup(NameStr(attr->attname));
366
367                                         fnncolumns = lappend(fnncolumns, makeString(attname));
368                                 }
369                         }
370                         /* maybe in future handle other options here */
371                 }
372         }
373
374         heap_close(rel, AccessShareLock);
375
376         /* Return DefElem only when some column(s) have force_not_null */
377         if (fnncolumns != NIL)
378                 return list_make1(makeDefElem("force_not_null", (Node *) fnncolumns));
379         else
380                 return NIL;
381 }
382
383 /*
384  * filePlanForeignScan
385  *              Create possible access paths for a scan on the foreign table
386  *
387  *              Currently we don't support any push-down feature, so there is only one
388  *              possible access path, which simply returns all records in the order in
389  *              the data file.
390  */
391 static void
392 filePlanForeignScan(Oid foreigntableid,
393                                         PlannerInfo *root,
394                                         RelOptInfo *baserel)
395 {
396         char       *filename;
397         List       *options;
398         Cost            startup_cost;
399         Cost            total_cost;
400
401         /* Fetch options --- we only need filename at this point */
402         fileGetOptions(foreigntableid, &filename, &options);
403
404         /* Estimate costs and update baserel->rows */
405         estimate_costs(root, baserel, filename,
406                                    &startup_cost, &total_cost);
407
408         /* Create a ForeignPath node and add it as only possible path */
409         add_path(baserel, (Path *)
410                          create_foreignscan_path(root, baserel,
411                                                                          baserel->rows,
412                                                                          startup_cost,
413                                                                          total_cost,
414                                                                          NIL, /* no pathkeys */
415                                                                          NULL, /* no outer rel either */
416                                                                          NIL,
417                                                                          NIL)); /* no fdw_private data */
418
419         /*
420          * If data file was sorted, and we knew it somehow, we could insert
421          * appropriate pathkeys into the ForeignPath node to tell the planner that.
422          */
423 }
424
425 /*
426  * fileExplainForeignScan
427  *              Produce extra output for EXPLAIN
428  */
429 static void
430 fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
431 {
432         char       *filename;
433         List       *options;
434
435         /* Fetch options --- we only need filename at this point */
436         fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
437                                    &filename, &options);
438
439         ExplainPropertyText("Foreign File", filename, es);
440
441         /* Suppress file size if we're not showing cost details */
442         if (es->costs)
443         {
444                 struct stat stat_buf;
445
446                 if (stat(filename, &stat_buf) == 0)
447                         ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
448                                                                 es);
449         }
450 }
451
452 /*
453  * fileBeginForeignScan
454  *              Initiate access to the file by creating CopyState
455  */
456 static void
457 fileBeginForeignScan(ForeignScanState *node, int eflags)
458 {
459         char       *filename;
460         List       *options;
461         CopyState       cstate;
462         FileFdwExecutionState *festate;
463
464         /*
465          * Do nothing in EXPLAIN (no ANALYZE) case.  node->fdw_state stays NULL.
466          */
467         if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
468                 return;
469
470         /* Fetch options of foreign table */
471         fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
472                                    &filename, &options);
473
474         /*
475          * Create CopyState from FDW options.  We always acquire all columns, so
476          * as to match the expected ScanTupleSlot signature.
477          */
478         cstate = BeginCopyFrom(node->ss.ss_currentRelation,
479                                                    filename,
480                                                    NIL,
481                                                    options);
482
483         /*
484          * Save state in node->fdw_state.  We must save enough information to call
485          * BeginCopyFrom() again.
486          */
487         festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
488         festate->filename = filename;
489         festate->options = options;
490         festate->cstate = cstate;
491
492         node->fdw_state = (void *) festate;
493 }
494
495 /*
496  * fileIterateForeignScan
497  *              Read next record from the data file and store it into the
498  *              ScanTupleSlot as a virtual tuple
499  */
500 static TupleTableSlot *
501 fileIterateForeignScan(ForeignScanState *node)
502 {
503         FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
504         TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
505         bool            found;
506         ErrorContextCallback errcontext;
507
508         /* Set up callback to identify error line number. */
509         errcontext.callback = CopyFromErrorCallback;
510         errcontext.arg = (void *) festate->cstate;
511         errcontext.previous = error_context_stack;
512         error_context_stack = &errcontext;
513
514         /*
515          * The protocol for loading a virtual tuple into a slot is first
516          * ExecClearTuple, then fill the values/isnull arrays, then
517          * ExecStoreVirtualTuple.  If we don't find another row in the file, we
518          * just skip the last step, leaving the slot empty as required.
519          *
520          * We can pass ExprContext = NULL because we read all columns from the
521          * file, so no need to evaluate default expressions.
522          *
523          * We can also pass tupleOid = NULL because we don't allow oids for
524          * foreign tables.
525          */
526         ExecClearTuple(slot);
527         found = NextCopyFrom(festate->cstate, NULL,
528                                                  slot->tts_values, slot->tts_isnull,
529                                                  NULL);
530         if (found)
531                 ExecStoreVirtualTuple(slot);
532
533         /* Remove error callback. */
534         error_context_stack = errcontext.previous;
535
536         return slot;
537 }
538
539 /*
540  * fileEndForeignScan
541  *              Finish scanning foreign table and dispose objects used for this scan
542  */
543 static void
544 fileEndForeignScan(ForeignScanState *node)
545 {
546         FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
547
548         /* if festate is NULL, we are in EXPLAIN; nothing to do */
549         if (festate)
550                 EndCopyFrom(festate->cstate);
551 }
552
553 /*
554  * fileReScanForeignScan
555  *              Rescan table, possibly with new parameters
556  */
557 static void
558 fileReScanForeignScan(ForeignScanState *node)
559 {
560         FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
561
562         EndCopyFrom(festate->cstate);
563
564         festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
565                                                                         festate->filename,
566                                                                         NIL,
567                                                                         festate->options);
568 }
569
570 /*
571  * Estimate costs of scanning a foreign table.
572  *
573  * In addition to setting *startup_cost and *total_cost, this should
574  * update baserel->rows.
575  */
576 static void
577 estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
578                            const char *filename,
579                            Cost *startup_cost, Cost *total_cost)
580 {
581         struct stat stat_buf;
582         BlockNumber pages;
583         int                     tuple_width;
584         double          ntuples;
585         double          nrows;
586         Cost            run_cost = 0;
587         Cost            cpu_per_tuple;
588
589         /*
590          * Get size of the file.  It might not be there at plan time, though, in
591          * which case we have to use a default estimate.
592          */
593         if (stat(filename, &stat_buf) < 0)
594                 stat_buf.st_size = 10 * BLCKSZ;
595
596         /*
597          * Convert size to pages for use in I/O cost estimate below.
598          */
599         pages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
600         if (pages < 1)
601                 pages = 1;
602
603         /*
604          * Estimate the number of tuples in the file.  We back into this estimate
605          * using the planner's idea of the relation width; which is bogus if not
606          * all columns are being read, not to mention that the text representation
607          * of a row probably isn't the same size as its internal representation.
608          * FIXME later.
609          */
610         tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
611
612         ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
613
614         /*
615          * Now estimate the number of rows returned by the scan after applying the
616          * baserestrictinfo quals.      This is pretty bogus too, since the planner
617          * will have no stats about the relation, but it's better than nothing.
618          */
619         nrows = ntuples *
620                 clauselist_selectivity(root,
621                                                            baserel->baserestrictinfo,
622                                                            0,
623                                                            JOIN_INNER,
624                                                            NULL);
625
626         nrows = clamp_row_est(nrows);
627
628         /* Save the output-rows estimate for the planner */
629         baserel->rows = nrows;
630
631         /*
632          * Now estimate costs.  We estimate costs almost the same way as
633          * cost_seqscan(), thus assuming that I/O costs are equivalent to a
634          * regular table file of the same size.  However, we take per-tuple CPU
635          * costs as 10x of a seqscan, to account for the cost of parsing records.
636          */
637         run_cost += seq_page_cost * pages;
638
639         *startup_cost = baserel->baserestrictcost.startup;
640         cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple;
641         run_cost += cpu_per_tuple * ntuples;
642         *total_cost = *startup_cost + run_cost;
643 }