From 8e91e12bc3af85ba2287866669268f6825d2cc03 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 29 Sep 2016 13:32:27 -0400 Subject: [PATCH] Allow contrib/file_fdw to read from a program, like COPY FROM PROGRAM. This patch just exposes COPY's FROM PROGRAM option in contrib/file_fdw. There don't seem to be any security issues with that that are any worse than what already exist with file_fdw and COPY; as in the existing cases, only superusers are allowed to control what gets executed. A regression test case might be nice here, but choosing a 100% portable command to run is hard. (We haven't got a test for COPY FROM PROGRAM itself, either.) Corey Huinker and Adam Gomaa, reviewed by Amit Langote Discussion: --- contrib/file_fdw/file_fdw.c | 131 ++++++++++++++++-------- contrib/file_fdw/output/file_fdw.source | 6 +- doc/src/sgml/file-fdw.sgml | 66 ++++++++---- 3 files changed, 137 insertions(+), 66 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index b471991318..d325f53467 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * file_fdw.c - * foreign-data wrapper for server-side flat files. + * foreign-data wrapper for server-side flat files (or programs). * * Copyright (c) 2010-2016, PostgreSQL Global Development Group * @@ -57,8 +57,9 @@ struct FileFdwOption * fileGetOptions(), which currently doesn't bother to look at user mappings. */ static const struct FileFdwOption valid_options[] = { - /* File options */ + /* Data source options */ {"filename", ForeignTableRelationId}, + {"program", ForeignTableRelationId}, /* Format options */ /* oids option is not supported */ @@ -85,10 +86,12 @@ static const struct FileFdwOption valid_options[] = { */ typedef struct FileFdwPlanState { - char *filename; /* file to read */ - List *options; /* merged COPY options, excluding filename */ + char *filename; /* file or program to read from */ + bool is_program; /* true if filename represents an OS command */ + List *options; /* merged COPY options, excluding filename and + * is_program */ BlockNumber pages; /* estimate of file's physical size */ - double ntuples; /* estimate of number of rows in file */ + double ntuples; /* estimate of number of data rows */ } FileFdwPlanState; /* @@ -96,9 +99,11 @@ typedef struct FileFdwPlanState */ typedef struct FileFdwExecutionState { - char *filename; /* file to read */ - List *options; /* merged COPY options, excluding filename */ - CopyState cstate; /* state of reading file */ + char *filename; /* file or program to read from */ + bool is_program; /* true if filename represents an OS command */ + List *options; /* merged COPY options, excluding filename and + * is_program */ + CopyState cstate; /* COPY execution state */ } FileFdwExecutionState; /* @@ -139,7 +144,9 @@ static bool fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options); + char **filename, + bool *is_program, + List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -196,16 +203,16 @@ file_fdw_validator(PG_FUNCTION_ARGS) /* * Only superusers are allowed to set options of a file_fdw foreign table. - * This is because the filename is one of those options, and we don't want - * non-superusers to be able to determine which file gets read. + * This is because we don't want non-superusers to be able to control + * which file gets read or which program gets executed. * * Putting this sort of permissions check in a validator is a bit of a * crock, but there doesn't seem to be any other place that can enforce * the check more cleanly. * - * Note that the valid_options[] array disallows setting filename at any - * options level other than foreign table --- otherwise there'd still be a - * security hole. + * Note that the valid_options[] array disallows setting filename and + * program at any options level other than foreign table --- otherwise + * there'd still be a security hole. */ if (catalog == ForeignTableRelationId && !superuser()) ereport(ERROR, @@ -247,11 +254,11 @@ file_fdw_validator(PG_FUNCTION_ARGS) } /* - * Separate out filename and column-specific options, since + * Separate out filename, program, and column-specific options, since * ProcessCopyOptions won't accept them. */ - - if (strcmp(def->defname, "filename") == 0) + if (strcmp(def->defname, "filename") == 0 || + strcmp(def->defname, "program") == 0) { if (filename) ereport(ERROR, @@ -296,12 +303,13 @@ file_fdw_validator(PG_FUNCTION_ARGS) ProcessCopyOptions(NULL, NULL, true, other_options); /* - * Filename option is required for file_fdw foreign tables. + * Either filename or program option is required for file_fdw foreign + * tables. */ if (catalog == ForeignTableRelationId && filename == NULL) ereport(ERROR, (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), - errmsg("filename is required for file_fdw foreign tables"))); + errmsg("either filename or program is required for file_fdw foreign tables"))); PG_RETURN_VOID(); } @@ -326,12 +334,12 @@ is_valid_option(const char *option, Oid context) /* * Fetch the options for a file_fdw foreign table. * - * We have to separate out "filename" from the other options because - * it must not appear in the options list passed to the core COPY code. + * We have to separate out filename/program from the other options because + * those must not appear in the options list passed to the core COPY code. */ static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options) + char **filename, bool *is_program, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -359,9 +367,11 @@ fileGetOptions(Oid foreigntableid, options = list_concat(options, get_file_fdw_attribute_options(foreigntableid)); /* - * Separate out the filename. + * Separate out the filename or program option (we assume there is only + * one). */ *filename = NULL; + *is_program = false; prev = NULL; foreach(lc, options) { @@ -373,15 +383,22 @@ fileGetOptions(Oid foreigntableid, options = list_delete_cell(options, lc, prev); break; } + else if (strcmp(def->defname, "program") == 0) + { + *filename = defGetString(def); + *is_program = true; + options = list_delete_cell(options, lc, prev); + break; + } prev = lc; } /* - * The validator should have checked that a filename was included in the - * options, but check again, just in case. + * The validator should have checked that filename or program was included + * in the options, but check again, just in case. */ if (*filename == NULL) - elog(ERROR, "filename is required for file_fdw foreign tables"); + elog(ERROR, "either filename or program is required for file_fdw foreign tables"); *other_options = options; } @@ -475,12 +492,15 @@ fileGetForeignRelSize(PlannerInfo *root, FileFdwPlanState *fdw_private; /* - * Fetch options. We only need filename at this point, but we might as - * well get everything and not need to re-fetch it later in planning. + * Fetch options. We only need filename (or program) at this point, but + * we might as well get everything and not need to re-fetch it later in + * planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); fileGetOptions(foreigntableid, - &fdw_private->filename, &fdw_private->options); + &fdw_private->filename, + &fdw_private->is_program, + &fdw_private->options); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -583,20 +603,25 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; + bool is_program; List *options; - /* Fetch options --- we only need filename at this point */ + /* Fetch options --- we only need filename and is_program at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &is_program, &options); - ExplainPropertyText("Foreign File", filename, es); + if (is_program) + ExplainPropertyText("Foreign Program", filename, es); + else + ExplainPropertyText("Foreign File", filename, es); /* Suppress file size if we're not showing cost details */ if (es->costs) { struct stat stat_buf; - if (stat(filename, &stat_buf) == 0) + if (!is_program && + stat(filename, &stat_buf) == 0) ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size, es); } @@ -611,6 +636,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; + bool is_program; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -623,7 +649,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &is_program, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -635,7 +661,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) cstate = BeginCopyFrom(NULL, node->ss.ss_currentRelation, filename, - false, + is_program, NIL, options); @@ -645,6 +671,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) */ festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); festate->filename = filename; + festate->is_program = is_program; festate->options = options; festate->cstate = cstate; @@ -709,7 +736,7 @@ fileReScanForeignScan(ForeignScanState *node) festate->cstate = BeginCopyFrom(NULL, node->ss.ss_currentRelation, festate->filename, - false, + festate->is_program, NIL, festate->options); } @@ -738,11 +765,22 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; + bool is_program; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &is_program, &options); + + /* + * If this is a program instead of a file, just return false to skip + * analyzing the table. We could run the program and collect stats on + * whatever it currently returns, but it seems likely that in such cases + * the output would be too volatile for the stats to be useful. Maybe + * there should be an option to enable doing this? + */ + if (is_program) + return false; /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -769,8 +807,8 @@ fileAnalyzeForeignTable(Relation relation, /* * fileIsForeignScanParallelSafe - * Reading a file in a parallel worker should work just the same as - * reading it in the leader, so mark scans safe. + * Reading a file, or external program, in a parallel worker should work + * just the same as reading it in the leader, so mark scans safe. */ static bool fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, @@ -916,9 +954,10 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel, /* * Get size of the file. It might not be there at plan time, though, in - * which case we have to use a default estimate. + * which case we have to use a default estimate. We also have to fall + * back to the default if using a program as the input. */ - if (stat(fdw_private->filename, &stat_buf) < 0) + if (fdw_private->is_program || stat(fdw_private->filename, &stat_buf) < 0) stat_buf.st_size = 10 * BLCKSZ; /* @@ -1000,6 +1039,11 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, * that I/O costs are equivalent to a regular table file of the same size. * However, we take per-tuple CPU costs as 10x of a seqscan, to account * for the cost of parsing records. + * + * In the case of a program source, this calculation is even more divorced + * from reality, but we have no good alternative; and it's not clear that + * the numbers we produce here matter much anyway, since there's only one + * access path for the rel. */ run_cost += seq_page_cost * pages; @@ -1036,6 +1080,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; + bool is_program; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -1050,12 +1095,12 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &is_program, &options); /* * Create CopyState from FDW options. */ - cstate = BeginCopyFrom(NULL, onerel, filename, false, NIL, options); + cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NIL, options); /* * Use per-tuple memory context to prevent leak of memory used to read diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 6fa54409b9..01e2690a82 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -76,7 +76,7 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null ' '); -- ERROR ERROR: COPY null representation cannot use newline or carriage return CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR -ERROR: filename is required for file_fdw foreign tables +ERROR: either filename or program is required for file_fdw foreign tables CREATE FOREIGN TABLE agg_text ( a int2 CHECK (a >= 0), b float4 @@ -132,7 +132,7 @@ ERROR: invalid option "force_not_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, program, format, header, delimiter, quote, escape, null, encoding -- force_null is not allowed to be specified at any foreign object level: ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR ERROR: invalid option "force_null" @@ -145,7 +145,7 @@ ERROR: invalid option "force_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR ERROR: invalid option "force_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, program, format, header, delimiter, quote, escape, null, encoding -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index d3b39aa120..309a303e03 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -10,10 +10,11 @@ The file_fdw module provides the foreign-data wrapper file_fdw, which can be used to access data - files in the server's file system. Data files must be in a format + files in the server's file system, or to execute programs on the server + and read their output. The data file or program output must be in a format that can be read by COPY FROM; see for details. - Access to such data files is currently read-only. + Access to data files is currently read-only. @@ -27,7 +28,22 @@ - Specifies the file to be read. Required. Must be an absolute path name. + Specifies the file to be read. Must be an absolute path name. + Either filename or program must be + specified, but not both. + + + + + + program + + + + Specifies the command to be executed. The standard output of this + command will be read as though COPY FROM PROGRAM were used. + Either program or filename must be + specified, but not both. @@ -37,7 +53,7 @@ - Specifies the file's format, + Specifies the data format, the same as COPY's FORMAT option. @@ -48,7 +64,7 @@ - Specifies whether the file has a header line, + Specifies whether the data has a header line, the same as COPY's HEADER option. @@ -59,7 +75,7 @@ - Specifies the file's delimiter character, + Specifies the data delimiter character, the same as COPY's DELIMITER option. @@ -70,7 +86,7 @@ - Specifies the file's quote character, + Specifies the data quote character, the same as COPY's QUOTE option. @@ -81,7 +97,7 @@ - Specifies the file's escape character, + Specifies the data escape character, the same as COPY's ESCAPE option. @@ -92,7 +108,7 @@ - Specifies the file's null string, + Specifies the data null string, the same as COPY's NULL option. @@ -103,7 +119,7 @@ - Specifies the file's encoding, + Specifies the data encoding, the same as COPY's ENCODING option. @@ -112,11 +128,11 @@ - Note that while COPY allows options such as OIDS and HEADER - to be specified without a corresponding value, the foreign data wrapper + Note that while COPY allows options such as HEADER + to be specified without a corresponding value, the foreign table option syntax requires a value to be present in all cases. To activate - COPY options normally supplied without a value, you can - instead pass the value TRUE. + COPY options typically written without a value, you can pass + the value TRUE, since all such options are Booleans. @@ -133,7 +149,7 @@ This is a Boolean option. If true, it specifies that values of the column should not be matched against the null string (that is, the - file-level null option). This has the same effect + table-level null option). This has the same effect as listing the column in COPY's FORCE_NOT_NULL option. @@ -171,14 +187,24 @@ Changing table-level options requires superuser privileges, for security - reasons: only a superuser should be able to determine which file is read. - In principle non-superusers could be allowed to change the other options, - but that's not supported at present. + reasons: only a superuser should be able to control which file is read + or which program is run. In principle non-superusers could be allowed to + change the other options, but that's not supported at present. + + + + When specifying the program option, keep in mind that the option + string is executed by the shell. If you need to pass any arguments to the + command that come from an untrusted source, you must be careful to strip or + escape any characters that might have special meaning to the shell. + For security reasons, it is best to use a fixed command string, or at least + avoid passing any user input in it. For a foreign table using file_fdw, EXPLAIN shows - the name of the file to be read. Unless COSTS OFF is + the name of the file to be read or program to be run. + For a file, unless COSTS OFF is specified, the file size (in bytes) is shown as well. @@ -186,7 +212,7 @@ Create a Foreign Table for PostgreSQL CSV Logs - One of the obvious uses for the file_fdw is to make + One of the obvious uses for file_fdw is to make the PostgreSQL activity log available as a table for querying. To do this, first you must be logging to a CSV file, which here we will call pglog.csv. First, install file_fdw -- 2.40.0