From 03db44eae3e065174d5e29888b8d98c39b1f520b Mon Sep 17 00:00:00 2001 From: Itagaki Takahiro Date: Thu, 16 Dec 2010 06:56:28 +0900 Subject: [PATCH] Add pg_read_binary_file() and whole-file-at-once versions of pg_read_file(). One of the usages of the binary version is to read files in a different encoding from the server encoding. Dimitri Fontaine and Itagaki Takahiro. --- doc/src/sgml/func.sgml | 25 +++++- src/backend/utils/adt/genfile.c | 128 +++++++++++++++++++++++++------ src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.h | 6 ++ src/include/utils/builtins.h | 3 + 5 files changed, 139 insertions(+), 25 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 21f1ddfa50..7c1ba9d07f 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14449,11 +14449,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); - pg_read_file(filename text, offset bigint, length bigint) + pg_read_file(filename text [, offset bigint, length bigint]) text Return the contents of a text file + + + pg_read_binary_file(filename text [, offset bigint, length bigint]) + + bytea + Return the contents of a file + pg_stat_file(filename text) @@ -14482,6 +14489,22 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); at the given offset, returning at most length bytes (less if the end of file is reached first). If offset is negative, it is relative to the end of the file. + When offset and length parameters are omitted, + it returns the whole of the file. + The part of a file must be a valid text in the server encoding. + + + + pg_read_binary_file + + + pg_read_binary_file returns part of a file as like as + pg_read_file, but the result is a bytea value. + One of the usages is to read a file in the specified encoding combined with + convert_from function: + +SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); + diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index e8a36edcd4..e9212500c2 100644 --- a/src/backend/utils/adt/genfile.c +++ b/src/backend/utils/adt/genfile.c @@ -80,15 +80,14 @@ convert_and_check_filename(text *arg) /* - * Read a section of a file, returning it as text + * Read a section of a file, returning it as bytea + * + * We read the whole of the file when bytes_to_read is nagative. */ -Datum -pg_read_file(PG_FUNCTION_ARGS) +static bytea * +read_binary_file(text *filename_t, int64 seek_offset, int64 bytes_to_read) { - text *filename_t = PG_GETARG_TEXT_P(0); - int64 seek_offset = PG_GETARG_INT64(1); - int64 bytes_to_read = PG_GETARG_INT64(2); - char *buf; + bytea *buf; size_t nbytes; FILE *file; char *filename; @@ -100,6 +99,29 @@ pg_read_file(PG_FUNCTION_ARGS) filename = convert_and_check_filename(filename_t); + if (bytes_to_read < 0) + { + if (seek_offset < 0) + bytes_to_read = -seek_offset; + else + { + struct stat fst; + + if (stat(filename, &fst) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", filename))); + + bytes_to_read = fst.st_size - seek_offset; + } + } + + /* not sure why anyone thought that int64 length was a good idea */ + if (bytes_to_read > (MaxAllocSize - VARHDRSZ)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length too large"))); + if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL) ereport(ERROR, (errcode_for_file_access(), @@ -112,18 +134,7 @@ pg_read_file(PG_FUNCTION_ARGS) (errcode_for_file_access(), errmsg("could not seek in file \"%s\": %m", filename))); - if (bytes_to_read < 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("requested length cannot be negative"))); - - /* not sure why anyone thought that int64 length was a good idea */ - if (bytes_to_read > (MaxAllocSize - VARHDRSZ)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("requested length too large"))); - - buf = palloc((Size) bytes_to_read + VARHDRSZ); + buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); @@ -132,15 +143,86 @@ pg_read_file(PG_FUNCTION_ARGS) (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", filename))); - /* Make sure the input is valid */ - pg_verifymbstr(VARDATA(buf), nbytes, false); - SET_VARSIZE(buf, nbytes + VARHDRSZ); FreeFile(file); pfree(filename); - PG_RETURN_TEXT_P(buf); + return buf; +} + +/* + * In addition to read_binary_file, verify whether the contents are encoded + * in the database encoding. + */ +static text * +read_text_file(text *filename, int64 seek_offset, int64 bytes_to_read) +{ + bytea *buf = read_binary_file(filename, seek_offset, bytes_to_read); + + /* Make sure the input is valid */ + pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false); + + /* OK, we can cast it as text safely */ + return (text *) buf; +} + +/* + * Read a section of a file, returning it as text + */ +Datum +pg_read_file(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_P(0); + int64 seek_offset = PG_GETARG_INT64(1); + int64 bytes_to_read = PG_GETARG_INT64(2); + + if (bytes_to_read < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length cannot be negative"))); + + PG_RETURN_TEXT_P(read_text_file(filename_t, seek_offset, bytes_to_read)); +} + +/* + * Read the whole of a file, returning it as text + */ +Datum +pg_read_file_all(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_P(0); + + PG_RETURN_TEXT_P(read_text_file(filename_t, 0, -1)); +} + +/* + * Read a section of a file, returning it as bytea + */ +Datum +pg_read_binary_file(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_P(0); + int64 seek_offset = PG_GETARG_INT64(1); + int64 bytes_to_read = PG_GETARG_INT64(2); + + if (bytes_to_read < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("requested length cannot be negative"))); + + PG_RETURN_BYTEA_P(read_binary_file(filename_t, seek_offset, bytes_to_read)); +} + +/* + * Read the whole of a file, returning it as bytea + */ +Datum +pg_read_binary_file_all(PG_FUNCTION_ARGS) +{ + text *filename_t = PG_GETARG_TEXT_P(0); + + PG_RETURN_BYTEA_P(read_binary_file(filename_t, 0, -1)); } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 1c3d14951c..1ebd7a2d27 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201012131 +#define CATALOG_VERSION_NO 201012161 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index feae22e896..1e6e75f5c3 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3403,6 +3403,12 @@ DATA(insert OID = 2623 ( pg_stat_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2249 DESCR("return file information"); DATA(insert OID = 2624 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ )); DESCR("read text from a file"); +DATA(insert OID = 3826 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_read_file_all _null_ _null_ _null_ )); +DESCR("read text from a file"); +DATA(insert OID = 3827 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 17 "25 20 20" _null_ _null_ _null_ _null_ pg_read_binary_file _null_ _null_ _null_ )); +DESCR("read bytea from a file"); +DATA(insert OID = 3828 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 17 "25" _null_ _null_ _null_ _null_ pg_read_binary_file_all _null_ _null_ _null_ )); +DESCR("read bytea from a file"); DATA(insert OID = 2625 ( pg_ls_dir PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ )); DESCR("list all files in a directory"); DATA(insert OID = 2626 ( pg_sleep PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index a2fb7494cb..1888e312f8 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -442,6 +442,9 @@ extern Datum pg_relation_filepath(PG_FUNCTION_ARGS); /* genfile.c */ extern Datum pg_stat_file(PG_FUNCTION_ARGS); extern Datum pg_read_file(PG_FUNCTION_ARGS); +extern Datum pg_read_file_all(PG_FUNCTION_ARGS); +extern Datum pg_read_binary_file(PG_FUNCTION_ARGS); +extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS); extern Datum pg_ls_dir(PG_FUNCTION_ARGS); /* misc.c */ -- 2.40.0