From 3b5e03dca2afea7a2c12dbc8605175d0568b5555 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Tue, 4 Mar 2014 17:31:59 -0500 Subject: [PATCH] Provide a FORCE NULL option to COPY in CSV mode. This forces an input field containing the quoted null string to be returned as a NULL. Without this option, only unquoted null strings behave this way. This helps where some CSV producers insist on quoting every field, whether or not it is needed. The option takes a list of fields, and only applies to those columns. There is an equivalent column-level option added to file_fdw. Ian Barwick, with some tweaking by Andrew Dunstan, reviewed by Payal Singh. --- contrib/file_fdw/data/text.csv | 9 +-- contrib/file_fdw/file_fdw.c | 71 +++++++++++++++++++----- contrib/file_fdw/input/file_fdw.source | 17 +++++- contrib/file_fdw/output/file_fdw.source | 41 +++++++++++--- doc/src/sgml/file-fdw.sgml | 21 ++++++- doc/src/sgml/ref/copy.sgml | 19 ++++++- src/backend/commands/copy.c | 74 +++++++++++++++++++++++-- src/backend/parser/gram.y | 4 ++ src/test/regress/expected/copy2.out | 48 ++++++++++++++++ src/test/regress/sql/copy2.sql | 39 +++++++++++++ 10 files changed, 308 insertions(+), 35 deletions(-) diff --git a/contrib/file_fdw/data/text.csv b/contrib/file_fdw/data/text.csv index ed348a9e84..f55d9cf14d 100644 --- a/contrib/file_fdw/data/text.csv +++ b/contrib/file_fdw/data/text.csv @@ -1,4 +1,5 @@ -AAA,aaa -XYZ,xyz -NULL,NULL -ABC,abc +AAA,aaa,123,"" +XYZ,xyz,"",321 +NULL,NULL,NULL,NULL +NULL,NULL,"NULL",NULL +ABC,abc,"","" diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 5639f4de8d..7fb1dbcff3 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -48,9 +48,9 @@ struct FileFdwOption /* * Valid options for file_fdw. - * These options are based on the options for COPY FROM command. - * But note that force_not_null is handled as a boolean option attached to - * each column, not as a table option. + * These options are based on the options for the COPY FROM command. + * But note that force_not_null and force_null are handled as boolean options + * attached to a column, not as table options. * * Note: If you are adding new option for user mapping, you need to modify * fileGetOptions(), which currently doesn't bother to look at user mappings. @@ -69,7 +69,7 @@ static const struct FileFdwOption valid_options[] = { {"null", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, - + {"force_null", AttributeRelationId}, /* * force_quote is not supported by file_fdw because it's for COPY TO. */ @@ -187,6 +187,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) Oid catalog = PG_GETARG_OID(1); char *filename = NULL; DefElem *force_not_null = NULL; + DefElem *force_null = NULL; List *other_options = NIL; ListCell *cell; @@ -243,10 +244,10 @@ file_fdw_validator(PG_FUNCTION_ARGS) } /* - * Separate out filename and force_not_null, since ProcessCopyOptions - * won't accept them. (force_not_null only comes in a boolean - * per-column flavor here.) + * Separate out filename and column-specific options, since + * ProcessCopyOptions won't accept them. */ + if (strcmp(def->defname, "filename") == 0) { if (filename) @@ -255,16 +256,42 @@ file_fdw_validator(PG_FUNCTION_ARGS) errmsg("conflicting or redundant options"))); filename = defGetString(def); } + /* + * force_not_null is a boolean option; after validation we can discard + * it - it will be retrieved later in get_file_fdw_attribute_options() + */ else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("conflicting or redundant options"))); + errmsg("conflicting or redundant options"), + errhint("option \"force_not_null\" supplied more than once for a column"))); + if(force_null) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errhint("option \"force_not_null\" cannot be used together with \"force_null\""))); force_not_null = def; /* Don't care what the value is, as long as it's a legal boolean */ (void) defGetBoolean(def); } + /* See comments for force_not_null above */ + else if (strcmp(def->defname, "force_null") == 0) + { + if (force_null) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errhint("option \"force_null\" supplied more than once for a column"))); + if(force_not_null) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errhint("option \"force_null\" cannot be used together with \"force_not_null\""))); + force_null = def; + (void) defGetBoolean(def); + } else other_options = lappend(other_options, def); } @@ -369,8 +396,9 @@ fileGetOptions(Oid foreigntableid, * Retrieve per-column generic options from pg_attribute and construct a list * of DefElems representing them. * - * At the moment we only have "force_not_null", which should be combined into - * a single DefElem listing all such columns, since that's what COPY expects. + * At the moment we only have "force_not_null", and "force_null", + * which should each be combined into a single DefElem listing all such + * columns, since that's what COPY expects. */ static List * get_file_fdw_attribute_options(Oid relid) @@ -380,6 +408,9 @@ get_file_fdw_attribute_options(Oid relid) AttrNumber natts; AttrNumber attnum; List *fnncolumns = NIL; + List *fncolumns = NIL; + + List *options = NIL; rel = heap_open(relid, AccessShareLock); tupleDesc = RelationGetDescr(rel); @@ -410,17 +441,29 @@ get_file_fdw_attribute_options(Oid relid) fnncolumns = lappend(fnncolumns, makeString(attname)); } } + else if (strcmp(def->defname, "force_null") == 0) + { + if (defGetBoolean(def)) + { + char *attname = pstrdup(NameStr(attr->attname)); + + fncolumns = lappend(fncolumns, makeString(attname)); + } + } /* maybe in future handle other options here */ } } heap_close(rel, AccessShareLock); - /* Return DefElem only when some column(s) have force_not_null */ + /* Return DefElem only when some column(s) have force_not_null / force_null options set */ if (fnncolumns != NIL) - return list_make1(makeDefElem("force_not_null", (Node *) fnncolumns)); - else - return NIL; + options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns)); + + if (fncolumns != NIL) + options = lappend(options,makeDefElem("force_null", (Node *) fncolumns)); + + return options; } /* diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index f7fd28d44d..0c278aac80 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -81,11 +81,14 @@ OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', deli -- per-column options tests CREATE FOREIGN TABLE text_csv ( word1 text OPTIONS (force_not_null 'true'), - word2 text OPTIONS (force_not_null 'off') + word2 text OPTIONS (force_not_null 'off'), + word3 text OPTIONS (force_null 'true'), + word4 text OPTIONS (force_null 'off') ) SERVER file_server OPTIONS (format 'text', filename '@abs_srcdir@/data/text.csv', null 'NULL'); SELECT * FROM text_csv; -- ERROR ALTER FOREIGN TABLE text_csv OPTIONS (SET format 'csv'); +\pset null _null_ SELECT * FROM text_csv; -- force_not_null is not allowed to be specified at any foreign object level: @@ -94,6 +97,18 @@ ALTER SERVER file_server OPTIONS (ADD force_not_null '*'); -- ERROR CREATE USER MAPPING FOR public SERVER file_server OPTIONS (force_not_null '*'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR +-- force_not_null cannot be specified together with force_null +ALTER FOREIGN TABLE text_csv ALTER COLUMN word1 OPTIONS (force_null 'true'); --ERROR + +-- force_null is not allowed to be specified at any foreign object level: +ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR +ALTER SERVER file_server OPTIONS (ADD force_null '*'); -- ERROR +CREATE USER MAPPING FOR public SERVER file_server OPTIONS (force_null '*'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR + +-- force_null cannot be specified together with force_not_null +ALTER FOREIGN TABLE text_csv ALTER COLUMN word3 OPTIONS (force_not_null 'true'); --ERROR + -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; SELECT * FROM agg_csv ORDER BY a; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 4f90baebd6..2bec160887 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -96,20 +96,24 @@ OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', deli -- per-column options tests CREATE FOREIGN TABLE text_csv ( word1 text OPTIONS (force_not_null 'true'), - word2 text OPTIONS (force_not_null 'off') + word2 text OPTIONS (force_not_null 'off'), + word3 text OPTIONS (force_null 'true'), + word4 text OPTIONS (force_null 'off') ) SERVER file_server OPTIONS (format 'text', filename '@abs_srcdir@/data/text.csv', null 'NULL'); SELECT * FROM text_csv; -- ERROR ERROR: COPY force not null available only in CSV mode ALTER FOREIGN TABLE text_csv OPTIONS (SET format 'csv'); +\pset null _null_ SELECT * FROM text_csv; - word1 | word2 --------+------- - AAA | aaa - XYZ | xyz - NULL | - ABC | abc -(4 rows) + word1 | word2 | word3 | word4 +-------+--------+--------+-------- + AAA | aaa | 123 | + XYZ | xyz | | 321 + NULL | _null_ | _null_ | _null_ + NULL | _null_ | _null_ | _null_ + ABC | abc | | +(5 rows) -- force_not_null is not allowed to be specified at any foreign object level: ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_not_null '*'); -- ERROR @@ -124,6 +128,27 @@ HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +-- force_not_null cannot be specified together with force_null +ALTER FOREIGN TABLE text_csv ALTER COLUMN word1 OPTIONS (force_null 'true'); --ERROR +ERROR: conflicting or redundant options +HINT: option "force_null" cannot be used together with "force_not_null" +-- force_null is not allowed to be specified at any foreign object level: +ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR +ERROR: invalid option "force_null" +HINT: There are no valid options in this context. +ALTER SERVER file_server OPTIONS (ADD force_null '*'); -- ERROR +ERROR: invalid option "force_null" +HINT: There are no valid options in this context. +CREATE USER MAPPING FOR public SERVER file_server OPTIONS (force_null '*'); -- ERROR +ERROR: invalid option "force_null" +HINT: There are no valid options in this context. +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR +ERROR: invalid option "force_null" +HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +-- force_null cannot be specified together with force_not_null +ALTER FOREIGN TABLE text_csv ALTER COLUMN word3 OPTIONS (force_not_null 'true'); --ERROR +ERROR: conflicting or redundant options +HINT: option "force_not_null" cannot be used together with "force_null" -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 9385b26d34..d3b39aa120 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -112,11 +112,11 @@ - Note that while COPY allows options such as OIDS and HEADER + Note that while COPY allows options such as OIDS and HEADER to be specified without a corresponding value, the foreign data wrapper - syntax requires a value to be present in all cases. To activate + syntax requires a value to be present in all cases. To activate COPY options normally supplied without a value, you can - instead pass the value TRUE. + instead pass the value TRUE. @@ -140,6 +140,21 @@ + + force_null + + + + This is a Boolean option. If true, it specifies that values of the + column which match the null string are returned as NULL + even if the value is quoted. Without this option, only unquoted + values matching the null string are returned as NULL. + This has the same effect as listing the column in + COPY's FORCE_NULL option. + + + + diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 99f246af17..5be3514612 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -42,6 +42,7 @@ COPY { table_name [ ( escape_character' FORCE_QUOTE { ( column_name [, ...] ) | * } FORCE_NOT_NULL ( column_name [, ...] ) + FORCE_NULL ( column_name [, ...] ) ENCODING 'encoding_name' @@ -328,6 +329,20 @@ COPY { table_name [ ( + + FORCE_NULL + + + Match the specified columns' values against the null string, even + if it has been quoted, and if a match is found set the value to + NULL. In the default case where the null string is empty, + this converts a quoted empty string into NULL. + This option is allowed only in COPY FROM, and only when + using CSV format. + + + + ENCODING @@ -637,7 +652,9 @@ COPY count string, while an empty string data value is written with double quotes (""). Reading values follows similar rules. You can use FORCE_NOT_NULL to prevent NULL input - comparisons for specific columns. + comparisons for specific columns. You can also use + FORCE_NULL to convert quoted null string data values to + NULL. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 7c4039cb7f..70ee7e5048 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -125,6 +125,8 @@ typedef struct CopyStateData bool *force_quote_flags; /* per-column CSV FQ flags */ List *force_notnull; /* list of column names */ bool *force_notnull_flags; /* per-column CSV FNN flags */ + List *force_null; /* list of column names */ + bool *force_null_flags; /* per-column CSV FN flags */ bool convert_selectively; /* do selective binary conversion? */ List *convert_select; /* list of column names (can be NIL) */ bool *convert_select_flags; /* per-column CSV/TEXT CS flags */ @@ -1019,6 +1021,20 @@ ProcessCopyOptions(CopyState cstate, errmsg("argument to option \"%s\" must be a list of column names", defel->defname))); } + else if (strcmp(defel->defname, "force_null") == 0) + { + if (cstate->force_null) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + if (defel->arg && IsA(defel->arg, List)) + cstate->force_null = (List *) defel->arg; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname))); + } else if (strcmp(defel->defname, "convert_selectively") == 0) { /* @@ -1178,6 +1194,17 @@ ProcessCopyOptions(CopyState cstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force not null only available using COPY FROM"))); + /* Check force_null */ + if (!cstate->csv_mode && cstate->force_null != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force null available only in CSV mode"))); + + if (cstate->force_null != NIL && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force null only available using COPY FROM"))); + /* Don't allow the delimiter to appear in the null string. */ if (strchr(cstate->null_print, cstate->delim[0]) != NULL) ereport(ERROR, @@ -1385,6 +1412,28 @@ BeginCopy(bool is_from, } } + /* Convert FORCE NULL name list to per-column flags, check validity */ + cstate->force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->force_null) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_null); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE NULL column \"%s\" not referenced by COPY", + NameStr(tupDesc->attrs[attnum - 1]->attname)))); + cstate->force_null_flags[attnum - 1] = true; + } + } + /* Convert convert_selectively name list to per-column flags */ if (cstate->convert_selectively) { @@ -2810,11 +2859,28 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext, continue; } - if (cstate->csv_mode && string == NULL && - cstate->force_notnull_flags[m]) + if (cstate->csv_mode) { - /* Go ahead and read the NULL string */ - string = cstate->null_print; + if(string == NULL && + cstate->force_notnull_flags[m]) + { + /* + * FORCE_NOT_NULL option is set and column is NULL - + * convert it to the NULL string. + */ + string = cstate->null_print; + } + else if(string != NULL && cstate->force_null_flags[m] + && strcmp(string,cstate->null_print) == 0 ) + { + /* + * FORCE_NULL option is set and column matches the NULL string. + * It must have been quoted, or otherwise the string would already + * have been set to NULL. + * Convert it to NULL as specified. + */ + string = NULL; + } } cstate->cur_attname = NameStr(attr[m]->attname); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 81169a4896..e3060a4dff 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2548,6 +2548,10 @@ copy_opt_item: { $$ = makeDefElem("force_not_null", (Node *)$4); } + | FORCE NULL_P columnList + { + $$ = makeDefElem("force_null", (Node *)$3); + } | ENCODING Sconst { $$ = makeDefElem("encoding", (Node *)makeString($2)); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 34fa131c52..76dea287e2 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -382,6 +382,54 @@ SELECT * FROM vistest; e (2 rows) +-- Test FORCE_NOT_NULL and FORCE_NULL options +-- should succeed with "b" set to an empty string and "c" set to NULL +CREATE TEMP TABLE forcetest ( + a INT NOT NULL, + b TEXT NOT NULL, + c TEXT, + d TEXT, + e TEXT +); +\pset null NULL +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 1; + b | c +---+------ + | NULL +(1 row) + +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +COMMIT; +SELECT b, c FROM forcetest WHERE a = 2; + b | c +---+------ + | NULL +(1 row) + +-- should fail with not-null constraint violation +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b), FORCE_NOT_NULL(c)); +ERROR: null value in column "b" violates not-null constraint +DETAIL: Failing row contains (3, null, , null, null). +CONTEXT: COPY forcetest, line 1: "3,,""" +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b)); +ERROR: FORCE NOT NULL column "b" not referenced by COPY +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b)); +ERROR: FORCE NULL column "b" not referenced by COPY +ROLLBACK; +\pset null '' +DROP TABLE forcetest; DROP TABLE vistest; DROP FUNCTION truncate_in_subxact(); DROP TABLE x, y; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index c46128b38a..e2be21f526 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -270,6 +270,45 @@ e SELECT * FROM vistest; COMMIT; SELECT * FROM vistest; +-- Test FORCE_NOT_NULL and FORCE_NULL options +-- should succeed with "b" set to an empty string and "c" set to NULL +CREATE TEMP TABLE forcetest ( + a INT NOT NULL, + b TEXT NOT NULL, + c TEXT, + d TEXT, + e TEXT +); +\pset null NULL +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +1,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 1; +-- should succeed with no effect ("b" remains an empty string, "c" remains NULL) +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c)); +2,,"" +\. +COMMIT; +SELECT b, c FROM forcetest WHERE a = 2; +-- should fail with not-null constraint violation +BEGIN; +COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b), FORCE_NOT_NULL(c)); +3,,"" +\. +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b)); +ROLLBACK; +-- should fail with "not referenced by COPY" error +BEGIN; +COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b)); +ROLLBACK; +\pset null '' +DROP TABLE forcetest; DROP TABLE vistest; DROP FUNCTION truncate_in_subxact(); DROP TABLE x, y; -- 2.40.0