1 /*-------------------------------------------------------------------------
4 * Implements the COPY utility command.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.251 2005/09/24 22:54:36 tgl Exp $
13 *-------------------------------------------------------------------------
20 #include <netinet/in.h>
21 #include <arpa/inet.h>
23 #include "access/genam.h"
24 #include "access/heapam.h"
25 #include "access/printtup.h"
26 #include "catalog/index.h"
27 #include "catalog/namespace.h"
28 #include "catalog/pg_index.h"
29 #include "catalog/pg_type.h"
30 #include "commands/copy.h"
31 #include "commands/trigger.h"
32 #include "executor/executor.h"
33 #include "libpq/libpq.h"
34 #include "libpq/pqformat.h"
35 #include "mb/pg_wchar.h"
36 #include "miscadmin.h"
37 #include "nodes/makefuncs.h"
38 #include "parser/parse_coerce.h"
39 #include "parser/parse_relation.h"
40 #include "rewrite/rewriteHandler.h"
41 #include "storage/fd.h"
42 #include "tcop/pquery.h"
43 #include "tcop/tcopprot.h"
44 #include "utils/acl.h"
45 #include "utils/builtins.h"
46 #include "utils/lsyscache.h"
47 #include "utils/memutils.h"
48 #include "utils/relcache.h"
49 #include "utils/syscache.h"
52 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
53 #define OCTVALUE(c) ((c) - '0')
56 * Represents the different source/dest cases we need to worry about at
61 COPY_FILE, /* to/from file */
62 COPY_OLD_FE, /* to/from frontend (2.0 protocol) */
63 COPY_NEW_FE /* to/from frontend (3.0 protocol) */
67 * Represents the end-of-line terminator type of the input
78 * This struct contains all the state variables used throughout a COPY
79 * operation. For simplicity, we use the same struct for all variants
80 * of COPY, even though some fields are used in only some cases.
82 * A word about encoding considerations: encodings that are only supported on
83 * the client side are those where multibyte characters may have second or
84 * later bytes with the high bit not set. When scanning data in such an
85 * encoding to look for a match to a single-byte (ie ASCII) character,
86 * we must use the full pg_encoding_mblen() machinery to skip over
87 * multibyte characters, else we might find a false match to a trailing
88 * byte. In supported server encodings, there is no possibility of
89 * a false match, and it's faster to make useless comparisons to trailing
90 * bytes than it is to invoke pg_encoding_mblen() to skip over them.
91 * client_only_encoding is TRUE when we have to do it the hard way.
93 typedef struct CopyStateData
95 /* low-level state data */
96 CopyDest copy_dest; /* type of copy source/destination */
97 FILE *copy_file; /* used if copy_dest == COPY_FILE */
98 StringInfo fe_msgbuf; /* used if copy_dest == COPY_NEW_FE */
99 bool fe_copy; /* true for all FE copy dests */
100 bool fe_eof; /* true if detected end of copy data */
101 EolType eol_type; /* EOL type of input */
102 int client_encoding; /* remote side's character encoding */
103 bool need_transcoding; /* client encoding diff from server? */
104 bool client_only_encoding; /* encoding not valid on server? */
106 /* parameters from the COPY command */
107 Relation rel; /* relation to copy to or from */
108 List *attnumlist; /* integer list of attnums to copy */
109 bool binary; /* binary format? */
110 bool oids; /* include OIDs? */
111 bool csv_mode; /* Comma Separated Value format? */
112 bool header_line; /* CSV header line? */
113 char *null_print; /* NULL marker string (server encoding!) */
114 int null_print_len; /* length of same */
115 char *delim; /* column delimiter (must be 1 byte) */
116 char *quote; /* CSV quote char (must be 1 byte) */
117 char *escape; /* CSV escape char (must be 1 byte) */
118 List *force_quote_atts; /* integer list of attnums to FQ */
119 List *force_notnull_atts; /* integer list of attnums to FNN */
121 /* these are just for error messages, see copy_in_error_callback */
122 const char *cur_relname; /* table name for error messages */
123 int cur_lineno; /* line number for error messages */
124 const char *cur_attname; /* current att for error messages */
125 const char *cur_attval; /* current att value for error messages */
128 * These variables are used to reduce overhead in textual COPY FROM.
130 * attribute_buf holds the separated, de-escaped text for each field of
131 * the current line. The CopyReadAttributes functions return arrays of
132 * pointers into this buffer. We avoid palloc/pfree overhead by re-using
133 * the buffer on each cycle.
135 StringInfoData attribute_buf;
138 * Similarly, line_buf holds the whole input line being processed.
139 * The input cycle is first to read the whole line into line_buf,
140 * convert it to server encoding there, and then extract the individual
141 * attribute fields into attribute_buf. line_buf is preserved unmodified
142 * so that we can display it in error messages if appropriate.
144 StringInfoData line_buf;
145 bool line_buf_converted; /* converted to server encoding? */
148 * Finally, raw_buf holds raw data read from the data source (file or
149 * client connection). CopyReadLine parses this data sufficiently to
150 * locate line boundaries, then transfers the data to line_buf and
151 * converts it. Note: we guarantee that there is a \0 at
152 * raw_buf[raw_buf_len].
154 #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
156 int raw_buf_index; /* next byte to process */
157 int raw_buf_len; /* total # of bytes stored */
160 typedef CopyStateData *CopyState;
163 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
166 /* non-export function prototypes */
167 static void DoCopyTo(CopyState cstate);
168 static void CopyTo(CopyState cstate);
169 static void CopyFrom(CopyState cstate);
170 static bool CopyReadLine(CopyState cstate);
171 static bool CopyReadLineText(CopyState cstate);
172 static bool CopyReadLineCSV(CopyState cstate);
173 static int CopyReadAttributesText(CopyState cstate, int maxfields,
175 static int CopyReadAttributesCSV(CopyState cstate, int maxfields,
177 static Datum CopyReadBinaryAttribute(CopyState cstate,
178 int column_no, FmgrInfo *flinfo,
179 Oid typioparam, int32 typmod,
181 static void CopyAttributeOutText(CopyState cstate, char *server_string);
182 static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
184 static List *CopyGetAttnums(Relation rel, List *attnamelist);
185 static char *limit_printout_length(const char *str);
187 /* Low-level communications functions */
188 static void SendCopyBegin(CopyState cstate);
189 static void ReceiveCopyBegin(CopyState cstate);
190 static void SendCopyEnd(CopyState cstate);
191 static void CopySendData(CopyState cstate, void *databuf, int datasize);
192 static void CopySendString(CopyState cstate, const char *str);
193 static void CopySendChar(CopyState cstate, char c);
194 static void CopySendEndOfRow(CopyState cstate);
195 static int CopyGetData(CopyState cstate, void *databuf,
196 int minread, int maxread);
197 static void CopySendInt32(CopyState cstate, int32 val);
198 static bool CopyGetInt32(CopyState cstate, int32 *val);
199 static void CopySendInt16(CopyState cstate, int16 val);
200 static bool CopyGetInt16(CopyState cstate, int16 *val);
204 * Send copy start/stop messages for frontend copies. These have changed
205 * in past protocol redesigns.
208 SendCopyBegin(CopyState cstate)
210 if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
214 int natts = list_length(cstate->attnumlist);
215 int16 format = (cstate->binary ? 1 : 0);
218 pq_beginmessage(&buf, 'H');
219 pq_sendbyte(&buf, format); /* overall format */
220 pq_sendint(&buf, natts, 2);
221 for (i = 0; i < natts; i++)
222 pq_sendint(&buf, format, 2); /* per-column formats */
224 cstate->copy_dest = COPY_NEW_FE;
225 cstate->fe_msgbuf = makeStringInfo();
227 else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
232 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
233 errmsg("COPY BINARY is not supported to stdout or from stdin")));
234 pq_putemptymessage('H');
235 /* grottiness needed for old COPY OUT protocol */
237 cstate->copy_dest = COPY_OLD_FE;
244 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
245 errmsg("COPY BINARY is not supported to stdout or from stdin")));
246 pq_putemptymessage('B');
247 /* grottiness needed for old COPY OUT protocol */
249 cstate->copy_dest = COPY_OLD_FE;
254 ReceiveCopyBegin(CopyState cstate)
256 if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
260 int natts = list_length(cstate->attnumlist);
261 int16 format = (cstate->binary ? 1 : 0);
264 pq_beginmessage(&buf, 'G');
265 pq_sendbyte(&buf, format); /* overall format */
266 pq_sendint(&buf, natts, 2);
267 for (i = 0; i < natts; i++)
268 pq_sendint(&buf, format, 2); /* per-column formats */
270 cstate->copy_dest = COPY_NEW_FE;
271 cstate->fe_msgbuf = makeStringInfo();
273 else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
278 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
279 errmsg("COPY BINARY is not supported to stdout or from stdin")));
280 pq_putemptymessage('G');
281 cstate->copy_dest = COPY_OLD_FE;
288 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
289 errmsg("COPY BINARY is not supported to stdout or from stdin")));
290 pq_putemptymessage('D');
291 cstate->copy_dest = COPY_OLD_FE;
293 /* We *must* flush here to ensure FE knows it can send. */
298 SendCopyEnd(CopyState cstate)
300 if (cstate->copy_dest == COPY_NEW_FE)
304 /* Need to flush out file trailer word */
305 CopySendEndOfRow(cstate);
309 /* Shouldn't have any unsent data */
310 Assert(cstate->fe_msgbuf->len == 0);
312 /* Send Copy Done message */
313 pq_putemptymessage('c');
317 /* The FE/BE protocol uses \n as newline for all platforms */
318 CopySendData(cstate, "\\.\n", 3);
319 pq_endcopyout(false);
324 * CopySendData sends output data to the destination (file or frontend)
325 * CopySendString does the same for null-terminated strings
326 * CopySendChar does the same for single characters
327 * CopySendEndOfRow does the appropriate thing at end of each data row
329 * NB: no data conversion is applied by these functions
333 CopySendData(CopyState cstate, void *databuf, int datasize)
335 switch (cstate->copy_dest)
338 fwrite(databuf, datasize, 1, cstate->copy_file);
339 if (ferror(cstate->copy_file))
341 (errcode_for_file_access(),
342 errmsg("could not write to COPY file: %m")));
345 if (pq_putbytes((char *) databuf, datasize))
347 /* no hope of recovering connection sync, so FATAL */
349 (errcode(ERRCODE_CONNECTION_FAILURE),
350 errmsg("connection lost during COPY to stdout")));
354 appendBinaryStringInfo(cstate->fe_msgbuf,
355 (char *) databuf, datasize);
361 CopySendString(CopyState cstate, const char *str)
363 CopySendData(cstate, (void *) str, strlen(str));
367 CopySendChar(CopyState cstate, char c)
369 CopySendData(cstate, &c, 1);
373 CopySendEndOfRow(CopyState cstate)
375 switch (cstate->copy_dest)
380 /* Default line termination depends on platform */
382 CopySendChar(cstate, '\n');
384 CopySendString(cstate, "\r\n");
389 /* The FE/BE protocol uses \n as newline for all platforms */
391 CopySendChar(cstate, '\n');
394 /* The FE/BE protocol uses \n as newline for all platforms */
396 CopySendChar(cstate, '\n');
397 /* Dump the accumulated row as one CopyData message */
398 (void) pq_putmessage('d', cstate->fe_msgbuf->data,
399 cstate->fe_msgbuf->len);
400 /* Reset fe_msgbuf to empty */
401 cstate->fe_msgbuf->len = 0;
402 cstate->fe_msgbuf->data[0] = '\0';
408 * CopyGetData reads data from the source (file or frontend)
410 * We attempt to read at least minread, and at most maxread, bytes from
411 * the source. The actual number of bytes read is returned; if this is
412 * less than minread, EOF was detected.
414 * Note: when copying from the frontend, we expect a proper EOF mark per
415 * protocol; if the frontend simply drops the connection, we raise error.
416 * It seems unwise to allow the COPY IN to complete normally in that case.
418 * NB: no data conversion is applied here.
421 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
425 switch (cstate->copy_dest)
428 bytesread = fread(databuf, 1, maxread, cstate->copy_file);
429 if (ferror(cstate->copy_file))
431 (errcode_for_file_access(),
432 errmsg("could not read from COPY file: %m")));
436 * We cannot read more than minread bytes (which in practice is 1)
437 * because old protocol doesn't have any clear way of separating
438 * the COPY stream from following data. This is slow, but not
439 * any slower than the code path was originally, and we don't
440 * care much anymore about the performance of old protocol.
442 if (pq_getbytes((char *) databuf, minread))
444 /* Only a \. terminator is legal EOF in old protocol */
446 (errcode(ERRCODE_CONNECTION_FAILURE),
447 errmsg("unexpected EOF on client connection")));
452 while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
456 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
458 /* Try to receive another message */
462 mtype = pq_getbyte();
465 (errcode(ERRCODE_CONNECTION_FAILURE),
466 errmsg("unexpected EOF on client connection")));
467 if (pq_getmessage(cstate->fe_msgbuf, 0))
469 (errcode(ERRCODE_CONNECTION_FAILURE),
470 errmsg("unexpected EOF on client connection")));
473 case 'd': /* CopyData */
475 case 'c': /* CopyDone */
476 /* COPY IN correctly terminated by frontend */
477 cstate->fe_eof = true;
479 case 'f': /* CopyFail */
481 (errcode(ERRCODE_QUERY_CANCELED),
482 errmsg("COPY from stdin failed: %s",
483 pq_getmsgstring(cstate->fe_msgbuf))));
485 case 'H': /* Flush */
489 * Ignore Flush/Sync for the convenience of
490 * client libraries (such as libpq) that may
491 * send those without noticing that the
492 * command they just sent was COPY.
497 (errcode(ERRCODE_PROTOCOL_VIOLATION),
498 errmsg("unexpected message type 0x%02X during COPY from stdin",
503 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
506 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
507 databuf = (void *) ((char *) databuf + avail);
519 * These functions do apply some data conversion
523 * CopySendInt32 sends an int32 in network byte order
526 CopySendInt32(CopyState cstate, int32 val)
530 buf = htonl((uint32) val);
531 CopySendData(cstate, &buf, sizeof(buf));
535 * CopyGetInt32 reads an int32 that appears in network byte order
537 * Returns true if OK, false if EOF
540 CopyGetInt32(CopyState cstate, int32 *val)
544 if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
546 *val = 0; /* suppress compiler warning */
549 *val = (int32) ntohl(buf);
554 * CopySendInt16 sends an int16 in network byte order
557 CopySendInt16(CopyState cstate, int16 val)
561 buf = htons((uint16) val);
562 CopySendData(cstate, &buf, sizeof(buf));
566 * CopyGetInt16 reads an int16 that appears in network byte order
569 CopyGetInt16(CopyState cstate, int16 *val)
573 if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
575 *val = 0; /* suppress compiler warning */
578 *val = (int16) ntohs(buf);
584 * CopyLoadRawBuf loads some more data into raw_buf
586 * Returns TRUE if able to obtain at least one more byte, else FALSE.
588 * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
589 * down to the start of the buffer and then we load more data after that.
590 * This case is used only when a frontend multibyte character crosses a
591 * bufferload boundary.
594 CopyLoadRawBuf(CopyState cstate)
599 if (cstate->raw_buf_index < cstate->raw_buf_len)
601 /* Copy down the unprocessed data */
602 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
603 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
607 nbytes = 0; /* no data need be saved */
609 inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
610 1, RAW_BUF_SIZE - nbytes);
612 cstate->raw_buf[nbytes] = '\0';
613 cstate->raw_buf_index = 0;
614 cstate->raw_buf_len = nbytes;
615 return (inbytes > 0);
620 * DoCopy executes the SQL COPY statement.
622 * Either unload or reload contents of table <relation>, depending on <from>.
623 * (<from> = TRUE means we are inserting into the table.)
625 * If <pipe> is false, transfer is between the table and the file named
626 * <filename>. Otherwise, transfer is between the table and our regular
627 * input/output stream. The latter could be either stdin/stdout or a
628 * socket, depending on whether we're running under Postmaster control.
630 * Iff <binary>, unload or reload in the binary format, as opposed to the
631 * more wasteful but more robust and portable text format.
633 * Iff <oids>, unload or reload the format that includes OID information.
634 * On input, we accept OIDs whether or not the table has an OID column,
635 * but silently drop them if it does not. On output, we report an error
636 * if the user asks for OIDs in a table that has none (not providing an
637 * OID column might seem friendlier, but could seriously confuse programs).
639 * If in the text format, delimit columns with delimiter <delim> and print
640 * NULL values as <null_print>.
642 * Do not allow a Postgres user without superuser privilege to read from
643 * or write to a file.
645 * Do not allow the copy if user doesn't have proper permission to access
649 DoCopy(const CopyStmt *stmt)
652 RangeVar *relation = stmt->relation;
653 char *filename = stmt->filename;
654 bool is_from = stmt->is_from;
655 bool pipe = (stmt->filename == NULL);
656 List *attnamelist = stmt->attlist;
657 List *force_quote = NIL;
658 List *force_notnull = NIL;
659 AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT);
663 /* Allocate workspace and zero all fields */
664 cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
666 /* Extract options from the statement node tree */
667 foreach(option, stmt->options)
669 DefElem *defel = (DefElem *) lfirst(option);
671 if (strcmp(defel->defname, "binary") == 0)
675 (errcode(ERRCODE_SYNTAX_ERROR),
676 errmsg("conflicting or redundant options")));
677 cstate->binary = intVal(defel->arg);
679 else if (strcmp(defel->defname, "oids") == 0)
683 (errcode(ERRCODE_SYNTAX_ERROR),
684 errmsg("conflicting or redundant options")));
685 cstate->oids = intVal(defel->arg);
687 else if (strcmp(defel->defname, "delimiter") == 0)
691 (errcode(ERRCODE_SYNTAX_ERROR),
692 errmsg("conflicting or redundant options")));
693 cstate->delim = strVal(defel->arg);
695 else if (strcmp(defel->defname, "null") == 0)
697 if (cstate->null_print)
699 (errcode(ERRCODE_SYNTAX_ERROR),
700 errmsg("conflicting or redundant options")));
701 cstate->null_print = strVal(defel->arg);
703 else if (strcmp(defel->defname, "csv") == 0)
705 if (cstate->csv_mode)
707 (errcode(ERRCODE_SYNTAX_ERROR),
708 errmsg("conflicting or redundant options")));
709 cstate->csv_mode = intVal(defel->arg);
711 else if (strcmp(defel->defname, "header") == 0)
713 if (cstate->header_line)
715 (errcode(ERRCODE_SYNTAX_ERROR),
716 errmsg("conflicting or redundant options")));
717 cstate->header_line = intVal(defel->arg);
719 else if (strcmp(defel->defname, "quote") == 0)
723 (errcode(ERRCODE_SYNTAX_ERROR),
724 errmsg("conflicting or redundant options")));
725 cstate->quote = strVal(defel->arg);
727 else if (strcmp(defel->defname, "escape") == 0)
731 (errcode(ERRCODE_SYNTAX_ERROR),
732 errmsg("conflicting or redundant options")));
733 cstate->escape = strVal(defel->arg);
735 else if (strcmp(defel->defname, "force_quote") == 0)
739 (errcode(ERRCODE_SYNTAX_ERROR),
740 errmsg("conflicting or redundant options")));
741 force_quote = (List *) defel->arg;
743 else if (strcmp(defel->defname, "force_notnull") == 0)
747 (errcode(ERRCODE_SYNTAX_ERROR),
748 errmsg("conflicting or redundant options")));
749 force_notnull = (List *) defel->arg;
752 elog(ERROR, "option \"%s\" not recognized",
756 /* Check for incompatible options */
757 if (cstate->binary && cstate->delim)
759 (errcode(ERRCODE_SYNTAX_ERROR),
760 errmsg("cannot specify DELIMITER in BINARY mode")));
762 if (cstate->binary && cstate->csv_mode)
764 (errcode(ERRCODE_SYNTAX_ERROR),
765 errmsg("cannot specify CSV in BINARY mode")));
767 if (cstate->binary && cstate->null_print)
769 (errcode(ERRCODE_SYNTAX_ERROR),
770 errmsg("cannot specify NULL in BINARY mode")));
772 /* Set defaults for omitted options */
774 cstate->delim = cstate->csv_mode ? "," : "\t";
776 if (!cstate->null_print)
777 cstate->null_print = cstate->csv_mode ? "" : "\\N";
778 cstate->null_print_len = strlen(cstate->null_print);
780 if (cstate->csv_mode)
783 cstate->quote = "\"";
785 cstate->escape = cstate->quote;
788 /* Only single-character delimiter strings are supported. */
789 if (strlen(cstate->delim) != 1)
791 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
792 errmsg("COPY delimiter must be a single character")));
795 if (!cstate->csv_mode && cstate->header_line)
797 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
798 errmsg("COPY HEADER available only in CSV mode")));
801 if (!cstate->csv_mode && cstate->quote != NULL)
803 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
804 errmsg("COPY quote available only in CSV mode")));
806 if (cstate->csv_mode && strlen(cstate->quote) != 1)
808 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
809 errmsg("COPY quote must be a single character")));
812 if (!cstate->csv_mode && cstate->escape != NULL)
814 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
815 errmsg("COPY escape available only in CSV mode")));
817 if (cstate->csv_mode && strlen(cstate->escape) != 1)
819 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
820 errmsg("COPY escape must be a single character")));
822 /* Check force_quote */
823 if (!cstate->csv_mode && force_quote != NIL)
825 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
826 errmsg("COPY force quote available only in CSV mode")));
827 if (force_quote != NIL && is_from)
829 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
830 errmsg("COPY force quote only available using COPY TO")));
832 /* Check force_notnull */
833 if (!cstate->csv_mode && force_notnull != NIL)
835 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 errmsg("COPY force not null available only in CSV mode")));
837 if (force_notnull != NIL && !is_from)
839 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
840 errmsg("COPY force not null only available using COPY FROM")));
842 /* Don't allow the delimiter to appear in the null string. */
843 if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
845 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
846 errmsg("COPY delimiter must not appear in the NULL specification")));
848 /* Don't allow the CSV quote char to appear in the null string. */
849 if (cstate->csv_mode &&
850 strchr(cstate->null_print, cstate->quote[0]) != NULL)
852 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
853 errmsg("CSV quote character must not appear in the NULL specification")));
855 /* Open and lock the relation, using the appropriate lock type. */
856 cstate->rel = heap_openrv(relation,
857 (is_from ? RowExclusiveLock : AccessShareLock));
859 /* check read-only transaction */
860 if (XactReadOnly && !is_from &&
861 !isTempNamespace(RelationGetNamespace(cstate->rel)))
863 (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
864 errmsg("transaction is read-only")));
866 /* Check permissions. */
867 aclresult = pg_class_aclcheck(RelationGetRelid(cstate->rel), GetUserId(),
869 if (aclresult != ACLCHECK_OK)
870 aclcheck_error(aclresult, ACL_KIND_CLASS,
871 RelationGetRelationName(cstate->rel));
872 if (!pipe && !superuser())
874 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
875 errmsg("must be superuser to COPY to or from a file"),
876 errhint("Anyone can COPY to stdout or from stdin. "
877 "psql's \\copy command also works for anyone.")));
879 /* Don't allow COPY w/ OIDs to or from a table without them */
880 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
882 (errcode(ERRCODE_UNDEFINED_COLUMN),
883 errmsg("table \"%s\" does not have OIDs",
884 RelationGetRelationName(cstate->rel))));
886 /* Generate or convert list of attributes to process */
887 cstate->attnumlist = CopyGetAttnums(cstate->rel, attnamelist);
889 /* Convert FORCE QUOTE name list to column numbers, check validity */
892 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
893 Form_pg_attribute *attr = tupDesc->attrs;
896 cstate->force_quote_atts = CopyGetAttnums(cstate->rel, force_quote);
898 foreach(cur, cstate->force_quote_atts)
900 int attnum = lfirst_int(cur);
902 if (!list_member_int(cstate->attnumlist, attnum))
904 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
905 errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
906 NameStr(attr[attnum - 1]->attname))));
910 /* Convert FORCE NOT NULL name list to column numbers, check validity */
913 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
914 Form_pg_attribute *attr = tupDesc->attrs;
917 cstate->force_notnull_atts = CopyGetAttnums(cstate->rel,
920 foreach(cur, cstate->force_notnull_atts)
922 int attnum = lfirst_int(cur);
924 if (!list_member_int(cstate->attnumlist, attnum))
926 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
927 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
928 NameStr(attr[attnum - 1]->attname))));
932 /* Set up variables to avoid per-attribute overhead. */
933 initStringInfo(&cstate->attribute_buf);
934 initStringInfo(&cstate->line_buf);
935 cstate->line_buf_converted = false;
936 cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
937 cstate->raw_buf_index = cstate->raw_buf_len = 0;
939 /* Set up encoding conversion info */
940 cstate->client_encoding = pg_get_client_encoding();
941 cstate->need_transcoding = (cstate->client_encoding != GetDatabaseEncoding());
942 cstate->client_only_encoding = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
944 cstate->copy_dest = COPY_FILE; /* default */
947 { /* copy from file to database */
948 if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
950 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
952 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
953 errmsg("cannot copy to view \"%s\"",
954 RelationGetRelationName(cstate->rel))));
955 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
957 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
958 errmsg("cannot copy to sequence \"%s\"",
959 RelationGetRelationName(cstate->rel))));
962 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
963 errmsg("cannot copy to non-table relation \"%s\"",
964 RelationGetRelationName(cstate->rel))));
968 if (whereToSendOutput == Remote)
969 ReceiveCopyBegin(cstate);
971 cstate->copy_file = stdin;
977 cstate->copy_file = AllocateFile(filename, PG_BINARY_R);
979 if (cstate->copy_file == NULL)
981 (errcode_for_file_access(),
982 errmsg("could not open file \"%s\" for reading: %m",
985 fstat(fileno(cstate->copy_file), &st);
986 if (S_ISDIR(st.st_mode))
988 FreeFile(cstate->copy_file);
990 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
991 errmsg("\"%s\" is a directory", filename)));
998 { /* copy from database to file */
999 if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
1001 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
1003 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1004 errmsg("cannot copy from view \"%s\"",
1005 RelationGetRelationName(cstate->rel))));
1006 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
1008 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1009 errmsg("cannot copy from sequence \"%s\"",
1010 RelationGetRelationName(cstate->rel))));
1013 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1014 errmsg("cannot copy from non-table relation \"%s\"",
1015 RelationGetRelationName(cstate->rel))));
1019 if (whereToSendOutput == Remote)
1020 cstate->fe_copy = true;
1022 cstate->copy_file = stdout;
1026 mode_t oumask; /* Pre-existing umask value */
1030 * Prevent write to relative path ... too easy to shoot
1031 * oneself in the foot by overwriting a database file ...
1033 if (!is_absolute_path(filename))
1035 (errcode(ERRCODE_INVALID_NAME),
1036 errmsg("relative path not allowed for COPY to file")));
1038 oumask = umask((mode_t) 022);
1039 cstate->copy_file = AllocateFile(filename, PG_BINARY_W);
1042 if (cstate->copy_file == NULL)
1044 (errcode_for_file_access(),
1045 errmsg("could not open file \"%s\" for writing: %m",
1048 fstat(fileno(cstate->copy_file), &st);
1049 if (S_ISDIR(st.st_mode))
1051 FreeFile(cstate->copy_file);
1053 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1054 errmsg("\"%s\" is a directory", filename)));
1063 /* we assume only the write case could fail here */
1064 if (FreeFile(cstate->copy_file))
1066 (errcode_for_file_access(),
1067 errmsg("could not write to file \"%s\": %m",
1072 * Close the relation. If reading, we can release the AccessShareLock
1073 * we got; if writing, we should hold the lock until end of
1074 * transaction to ensure that updates will be committed before lock is
1077 heap_close(cstate->rel, (is_from ? NoLock : AccessShareLock));
1079 /* Clean up storage (probably not really necessary) */
1080 pfree(cstate->attribute_buf.data);
1081 pfree(cstate->line_buf.data);
1082 pfree(cstate->raw_buf);
1088 * This intermediate routine just exists to localize the effects of setjmp
1089 * so we don't need to plaster a lot of variables with "volatile".
1092 DoCopyTo(CopyState cstate)
1096 if (cstate->fe_copy)
1097 SendCopyBegin(cstate);
1101 if (cstate->fe_copy)
1102 SendCopyEnd(cstate);
1107 * Make sure we turn off old-style COPY OUT mode upon error. It is
1108 * okay to do this in all cases, since it does nothing if the mode
1111 pq_endcopyout(true);
1118 * Copy from relation TO file.
1121 CopyTo(CopyState cstate)
1125 HeapScanDesc scandesc;
1128 Form_pg_attribute *attr;
1129 FmgrInfo *out_functions;
1132 char *null_print_client;
1134 MemoryContext oldcontext;
1135 MemoryContext mycontext;
1137 tupDesc = cstate->rel->rd_att;
1138 attr = tupDesc->attrs;
1139 num_phys_attrs = tupDesc->natts;
1140 attr_count = list_length(cstate->attnumlist);
1141 null_print_client = cstate->null_print; /* default */
1143 /* Get info about the columns we need to process. */
1144 out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1145 force_quote = (bool *) palloc(num_phys_attrs * sizeof(bool));
1146 foreach(cur, cstate->attnumlist)
1148 int attnum = lfirst_int(cur);
1153 getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1157 getTypeOutputInfo(attr[attnum - 1]->atttypid,
1160 fmgr_info(out_func_oid, &out_functions[attnum - 1]);
1162 if (list_member_int(cstate->force_quote_atts, attnum))
1163 force_quote[attnum - 1] = true;
1165 force_quote[attnum - 1] = false;
1169 * Create a temporary memory context that we can reset once per row to
1170 * recover palloc'd memory. This avoids any problems with leaks
1171 * inside datatype output routines, and should be faster than retail
1172 * pfree's anyway. (We don't need a whole econtext as CopyFrom does.)
1174 mycontext = AllocSetContextCreate(CurrentMemoryContext,
1176 ALLOCSET_DEFAULT_MINSIZE,
1177 ALLOCSET_DEFAULT_INITSIZE,
1178 ALLOCSET_DEFAULT_MAXSIZE);
1182 /* Generate header for a binary copy */
1186 CopySendData(cstate, (char *) BinarySignature, 11);
1191 CopySendInt32(cstate, tmp);
1192 /* No header extension */
1194 CopySendInt32(cstate, tmp);
1199 * For non-binary copy, we need to convert null_print to client
1200 * encoding, because it will be sent directly with CopySendString.
1202 if (cstate->need_transcoding)
1203 null_print_client = pg_server_to_client(cstate->null_print,
1204 cstate->null_print_len);
1206 /* if a header has been requested send the line */
1207 if (cstate->header_line)
1209 bool hdr_delim = false;
1211 foreach(cur, cstate->attnumlist)
1213 int attnum = lfirst_int(cur);
1217 CopySendChar(cstate, cstate->delim[0]);
1220 colname = NameStr(attr[attnum - 1]->attname);
1222 CopyAttributeOutCSV(cstate, colname, false);
1225 CopySendEndOfRow(cstate);
1229 scandesc = heap_beginscan(cstate->rel, ActiveSnapshot, 0, NULL);
1231 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1233 bool need_delim = false;
1235 CHECK_FOR_INTERRUPTS();
1237 MemoryContextReset(mycontext);
1238 oldcontext = MemoryContextSwitchTo(mycontext);
1242 /* Binary per-tuple header */
1243 CopySendInt16(cstate, attr_count);
1244 /* Send OID if wanted --- note attr_count doesn't include it */
1247 Oid oid = HeapTupleGetOid(tuple);
1249 /* Hack --- assume Oid is same size as int32 */
1250 CopySendInt32(cstate, sizeof(int32));
1251 CopySendInt32(cstate, oid);
1256 /* Text format has no per-tuple header, but send OID if wanted */
1257 /* Assume digits don't need any quoting or encoding conversion */
1260 string = DatumGetCString(DirectFunctionCall1(oidout,
1261 ObjectIdGetDatum(HeapTupleGetOid(tuple))));
1262 CopySendString(cstate, string);
1267 foreach(cur, cstate->attnumlist)
1269 int attnum = lfirst_int(cur);
1273 value = heap_getattr(tuple, attnum, tupDesc, &isnull);
1275 if (!cstate->binary)
1278 CopySendChar(cstate, cstate->delim[0]);
1284 if (!cstate->binary)
1285 CopySendString(cstate, null_print_client);
1287 CopySendInt32(cstate, -1);
1291 if (!cstate->binary)
1293 string = DatumGetCString(FunctionCall1(&out_functions[attnum - 1],
1295 if (cstate->csv_mode)
1296 CopyAttributeOutCSV(cstate, string,
1297 force_quote[attnum - 1]);
1299 CopyAttributeOutText(cstate, string);
1305 outputbytes = DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1],
1307 /* We assume the result will not have been toasted */
1308 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1309 CopySendData(cstate, VARDATA(outputbytes),
1310 VARSIZE(outputbytes) - VARHDRSZ);
1315 CopySendEndOfRow(cstate);
1317 MemoryContextSwitchTo(oldcontext);
1320 heap_endscan(scandesc);
1324 /* Generate trailer for a binary copy */
1325 CopySendInt16(cstate, -1);
1328 MemoryContextDelete(mycontext);
1330 pfree(out_functions);
1336 * error context callback for COPY FROM
1339 copy_in_error_callback(void *arg)
1341 CopyState cstate = (CopyState) arg;
1345 /* can't usefully display the data */
1346 if (cstate->cur_attname)
1347 errcontext("COPY %s, line %d, column %s",
1348 cstate->cur_relname, cstate->cur_lineno,
1349 cstate->cur_attname);
1351 errcontext("COPY %s, line %d",
1352 cstate->cur_relname, cstate->cur_lineno);
1356 if (cstate->cur_attname && cstate->cur_attval)
1358 /* error is relevant to a particular column */
1361 attval = limit_printout_length(cstate->cur_attval);
1362 errcontext("COPY %s, line %d, column %s: \"%s\"",
1363 cstate->cur_relname, cstate->cur_lineno,
1364 cstate->cur_attname, attval);
1369 /* error is relevant to a particular line */
1370 if (cstate->line_buf_converted || !cstate->need_transcoding)
1374 lineval = limit_printout_length(cstate->line_buf.data);
1375 errcontext("COPY %s, line %d: \"%s\"",
1376 cstate->cur_relname, cstate->cur_lineno, lineval);
1382 * Here, the line buffer is still in a foreign encoding,
1383 * and indeed it's quite likely that the error is precisely
1384 * a failure to do encoding conversion (ie, bad data). We
1385 * dare not try to convert it, and at present there's no way
1386 * to regurgitate it without conversion. So we have to punt
1387 * and just report the line number.
1389 errcontext("COPY %s, line %d",
1390 cstate->cur_relname, cstate->cur_lineno);
1397 * Make sure we don't print an unreasonable amount of COPY data in a message.
1399 * It would seem a lot easier to just use the sprintf "precision" limit to
1400 * truncate the string. However, some versions of glibc have a bug/misfeature
1401 * that vsnprintf will always fail (return -1) if it is asked to truncate
1402 * a string that contains invalid byte sequences for the current encoding.
1403 * So, do our own truncation. We return a pstrdup'd copy of the input.
1406 limit_printout_length(const char *str)
1408 #define MAX_COPY_DATA_DISPLAY 100
1410 int slen = strlen(str);
1414 /* Fast path if definitely okay */
1415 if (slen <= MAX_COPY_DATA_DISPLAY)
1416 return pstrdup(str);
1418 /* Apply encoding-dependent truncation */
1419 len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
1422 * Truncate, and add "..." to show we truncated the input.
1424 res = (char *) palloc(len + 4);
1425 memcpy(res, str, len);
1426 strcpy(res + len, "...");
1432 * Copy FROM file to relation.
1435 CopyFrom(CopyState cstate)
1439 Form_pg_attribute *attr;
1440 AttrNumber num_phys_attrs,
1443 FmgrInfo *in_functions;
1444 FmgrInfo oid_in_function;
1447 ExprState **constraintexprs;
1448 bool *force_notnull;
1449 bool hasConstraints = false;
1456 char **field_strings;
1459 ResultRelInfo *resultRelInfo;
1460 EState *estate = CreateExecutorState(); /* for ExecConstraints() */
1461 TupleTableSlot *slot;
1464 ExprState **defexprs; /* array of default att expressions */
1465 ExprContext *econtext; /* used for ExecEvalExpr for default atts */
1466 MemoryContext oldcontext = CurrentMemoryContext;
1467 ErrorContextCallback errcontext;
1469 tupDesc = RelationGetDescr(cstate->rel);
1470 attr = tupDesc->attrs;
1471 num_phys_attrs = tupDesc->natts;
1472 attr_count = list_length(cstate->attnumlist);
1476 * We need a ResultRelInfo so we can use the regular executor's
1477 * index-entry-making machinery. (There used to be a huge amount of
1478 * code here that basically duplicated execUtils.c ...)
1480 resultRelInfo = makeNode(ResultRelInfo);
1481 resultRelInfo->ri_RangeTableIndex = 1; /* dummy */
1482 resultRelInfo->ri_RelationDesc = cstate->rel;
1483 resultRelInfo->ri_TrigDesc = CopyTriggerDesc(cstate->rel->trigdesc);
1484 if (resultRelInfo->ri_TrigDesc)
1485 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1486 palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
1487 resultRelInfo->ri_TrigInstrument = NULL;
1489 ExecOpenIndices(resultRelInfo);
1491 estate->es_result_relations = resultRelInfo;
1492 estate->es_num_result_relations = 1;
1493 estate->es_result_relation_info = resultRelInfo;
1495 /* Set up a tuple slot too */
1496 slot = MakeSingleTupleTableSlot(tupDesc);
1498 econtext = GetPerTupleExprContext(estate);
1501 * Pick up the required catalog information for each attribute in the
1502 * relation, including the input function, the element type (to pass
1503 * to the input function), and info about defaults and constraints.
1504 * (Which input function we use depends on text/binary format choice.)
1506 in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1507 typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
1508 defmap = (int *) palloc(num_phys_attrs * sizeof(int));
1509 defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
1510 constraintexprs = (ExprState **) palloc0(num_phys_attrs * sizeof(ExprState *));
1511 force_notnull = (bool *) palloc(num_phys_attrs * sizeof(bool));
1513 for (attnum = 1; attnum <= num_phys_attrs; attnum++)
1515 /* We don't need info for dropped attributes */
1516 if (attr[attnum - 1]->attisdropped)
1519 /* Fetch the input function and typioparam info */
1521 getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
1522 &in_func_oid, &typioparams[attnum - 1]);
1524 getTypeInputInfo(attr[attnum - 1]->atttypid,
1525 &in_func_oid, &typioparams[attnum - 1]);
1526 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
1528 if (list_member_int(cstate->force_notnull_atts, attnum))
1529 force_notnull[attnum - 1] = true;
1531 force_notnull[attnum - 1] = false;
1533 /* Get default info if needed */
1534 if (!list_member_int(cstate->attnumlist, attnum))
1536 /* attribute is NOT to be copied from input */
1537 /* use default value if one exists */
1538 Node *defexpr = build_column_default(cstate->rel, attnum);
1540 if (defexpr != NULL)
1542 defexprs[num_defaults] = ExecPrepareExpr((Expr *) defexpr,
1544 defmap[num_defaults] = attnum - 1;
1549 /* If it's a domain type, set up to check domain constraints */
1550 if (get_typtype(attr[attnum - 1]->atttypid) == 'd')
1556 * Easiest way to do this is to use parse_coerce.c to set up
1557 * an expression that checks the constraints. (At present,
1558 * the expression might contain a length-coercion-function
1559 * call and/or CoerceToDomain nodes.) The bottom of the
1560 * expression is a Param node so that we can fill in the
1561 * actual datum during the data input loop.
1563 prm = makeNode(Param);
1564 prm->paramkind = PARAM_EXEC;
1566 prm->paramtype = getBaseType(attr[attnum - 1]->atttypid);
1568 node = coerce_to_domain((Node *) prm,
1570 attr[attnum - 1]->atttypid,
1571 COERCE_IMPLICIT_CAST, false, false);
1573 constraintexprs[attnum - 1] = ExecPrepareExpr((Expr *) node,
1575 hasConstraints = true;
1579 /* Prepare to catch AFTER triggers. */
1580 AfterTriggerBeginQuery();
1583 * Check BEFORE STATEMENT insertion triggers. It's debateable whether
1584 * we should do this for COPY, since it's not really an "INSERT"
1585 * statement as such. However, executing these triggers maintains
1586 * consistency with the EACH ROW triggers that we already fire on
1589 ExecBSInsertTriggers(estate, resultRelInfo);
1591 if (!cstate->binary)
1592 file_has_oids = cstate->oids; /* must rely on user to tell us... */
1595 /* Read and verify binary header */
1600 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
1601 memcmp(readSig, BinarySignature, 11) != 0)
1603 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1604 errmsg("COPY file signature not recognized")));
1606 if (!CopyGetInt32(cstate, &tmp))
1608 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1609 errmsg("invalid COPY file header (missing flags)")));
1610 file_has_oids = (tmp & (1 << 16)) != 0;
1612 if ((tmp >> 16) != 0)
1614 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1615 errmsg("unrecognized critical flags in COPY file header")));
1616 /* Header extension length */
1617 if (!CopyGetInt32(cstate, &tmp) ||
1620 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1621 errmsg("invalid COPY file header (missing length)")));
1622 /* Skip extension header, if present */
1625 if (CopyGetData(cstate, readSig, 1, 1) != 1)
1627 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1628 errmsg("invalid COPY file header (wrong length)")));
1632 if (file_has_oids && cstate->binary)
1634 getTypeBinaryInputInfo(OIDOID,
1635 &in_func_oid, &oid_typioparam);
1636 fmgr_info(in_func_oid, &oid_in_function);
1639 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1640 nulls = (char *) palloc(num_phys_attrs * sizeof(char));
1642 /* create workspace for CopyReadAttributes results */
1643 nfields = file_has_oids ? (attr_count + 1) : attr_count;
1644 field_strings = (char **) palloc(nfields * sizeof(char *));
1646 /* Make room for a PARAM_EXEC value for domain constraint checks */
1648 econtext->ecxt_param_exec_vals = (ParamExecData *)
1649 palloc0(sizeof(ParamExecData));
1651 /* Initialize state variables */
1652 cstate->fe_eof = false;
1653 cstate->eol_type = EOL_UNKNOWN;
1654 cstate->cur_relname = RelationGetRelationName(cstate->rel);
1655 cstate->cur_lineno = 0;
1656 cstate->cur_attname = NULL;
1657 cstate->cur_attval = NULL;
1659 /* Set up callback to identify error line number */
1660 errcontext.callback = copy_in_error_callback;
1661 errcontext.arg = (void *) cstate;
1662 errcontext.previous = error_context_stack;
1663 error_context_stack = &errcontext;
1665 /* on input just throw the header line away */
1666 if (cstate->header_line)
1668 cstate->cur_lineno++;
1669 done = CopyReadLine(cstate);
1675 Oid loaded_oid = InvalidOid;
1677 CHECK_FOR_INTERRUPTS();
1679 cstate->cur_lineno++;
1681 /* Reset the per-tuple exprcontext */
1682 ResetPerTupleExprContext(estate);
1684 /* Switch into its memory context */
1685 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1687 /* Initialize all values for row to NULL */
1688 MemSet(values, 0, num_phys_attrs * sizeof(Datum));
1689 MemSet(nulls, 'n', num_phys_attrs * sizeof(char));
1691 if (!cstate->binary)
1698 /* Actually read the line into memory here */
1699 done = CopyReadLine(cstate);
1702 * EOF at start of line means we're done. If we see EOF after
1703 * some characters, we act as though it was newline followed
1704 * by EOF, ie, process the line and then exit loop on next
1707 if (done && cstate->line_buf.len == 0)
1710 /* Parse the line into de-escaped field values */
1711 if (cstate->csv_mode)
1712 fldct = CopyReadAttributesCSV(cstate, nfields, field_strings);
1714 fldct = CopyReadAttributesText(cstate, nfields, field_strings);
1717 /* Read the OID field if present */
1720 if (fieldno >= fldct)
1722 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1723 errmsg("missing data for OID column")));
1724 string = field_strings[fieldno++];
1728 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1729 errmsg("null OID in COPY data")));
1732 cstate->cur_attname = "oid";
1733 cstate->cur_attval = string;
1734 loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
1735 CStringGetDatum(string)));
1736 if (loaded_oid == InvalidOid)
1738 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1739 errmsg("invalid OID in COPY data")));
1740 cstate->cur_attname = NULL;
1741 cstate->cur_attval = NULL;
1745 /* Loop to read the user attributes on the line. */
1746 foreach(cur, cstate->attnumlist)
1748 int attnum = lfirst_int(cur);
1751 if (fieldno >= fldct)
1753 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1754 errmsg("missing data for column \"%s\"",
1755 NameStr(attr[m]->attname))));
1756 string = field_strings[fieldno++];
1758 if (cstate->csv_mode && string == NULL && force_notnull[m])
1760 /* Go ahead and read the NULL string */
1761 string = cstate->null_print;
1764 /* If we read an SQL NULL, no need to do anything */
1767 cstate->cur_attname = NameStr(attr[m]->attname);
1768 cstate->cur_attval = string;
1769 values[m] = FunctionCall3(&in_functions[m],
1770 CStringGetDatum(string),
1771 ObjectIdGetDatum(typioparams[m]),
1772 Int32GetDatum(attr[m]->atttypmod));
1774 cstate->cur_attname = NULL;
1775 cstate->cur_attval = NULL;
1779 Assert(fieldno == nfields);
1787 if (!CopyGetInt16(cstate, &fld_count) ||
1794 if (fld_count != attr_count)
1796 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1797 errmsg("row field count is %d, expected %d",
1798 (int) fld_count, attr_count)));
1802 cstate->cur_attname = "oid";
1804 DatumGetObjectId(CopyReadBinaryAttribute(cstate,
1810 if (isnull || loaded_oid == InvalidOid)
1812 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1813 errmsg("invalid OID in COPY data")));
1814 cstate->cur_attname = NULL;
1818 foreach(cur, cstate->attnumlist)
1820 int attnum = lfirst_int(cur);
1823 cstate->cur_attname = NameStr(attr[m]->attname);
1825 values[m] = CopyReadBinaryAttribute(cstate,
1831 nulls[m] = isnull ? 'n' : ' ';
1832 cstate->cur_attname = NULL;
1837 * Now compute and insert any defaults available for the columns
1838 * not provided by the input data. Anything not processed here or
1839 * above will remain NULL.
1841 for (i = 0; i < num_defaults; i++)
1843 values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
1846 nulls[defmap[i]] = ' ';
1849 /* Next apply any domain constraints */
1852 ParamExecData *prmdata = &econtext->ecxt_param_exec_vals[0];
1854 for (i = 0; i < num_phys_attrs; i++)
1856 ExprState *exprstate = constraintexprs[i];
1858 if (exprstate == NULL)
1859 continue; /* no constraint for this attr */
1861 /* Insert current row's value into the Param value */
1862 prmdata->value = values[i];
1863 prmdata->isnull = (nulls[i] == 'n');
1866 * Execute the constraint expression. Allow the
1867 * expression to replace the value (consider e.g. a
1868 * timestamp precision restriction).
1870 values[i] = ExecEvalExpr(exprstate, econtext,
1872 nulls[i] = isnull ? 'n' : ' ';
1876 /* And now we can form the input tuple. */
1877 tuple = heap_formtuple(tupDesc, values, nulls);
1879 if (cstate->oids && file_has_oids)
1880 HeapTupleSetOid(tuple, loaded_oid);
1882 /* Triggers and stuff need to be invoked in query context. */
1883 MemoryContextSwitchTo(oldcontext);
1887 /* BEFORE ROW INSERT Triggers */
1888 if (resultRelInfo->ri_TrigDesc &&
1889 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1893 newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1895 if (newtuple == NULL) /* "do nothing" */
1897 else if (newtuple != tuple) /* modified by Trigger(s) */
1899 heap_freetuple(tuple);
1906 /* Place tuple in tuple slot */
1907 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1909 /* Check the constraints of the tuple */
1910 if (cstate->rel->rd_att->constr)
1911 ExecConstraints(resultRelInfo, slot, estate);
1913 /* OK, store the tuple and create index entries for it */
1914 simple_heap_insert(cstate->rel, tuple);
1916 if (resultRelInfo->ri_NumIndices > 0)
1917 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1919 /* AFTER ROW INSERT Triggers */
1920 ExecARInsertTriggers(estate, resultRelInfo, tuple);
1924 /* Done, clean up */
1925 error_context_stack = errcontext.previous;
1927 MemoryContextSwitchTo(oldcontext);
1929 /* Execute AFTER STATEMENT insertion triggers */
1930 ExecASInsertTriggers(estate, resultRelInfo);
1932 /* Handle queued AFTER triggers */
1933 AfterTriggerEndQuery(estate);
1937 pfree(field_strings);
1939 pfree(in_functions);
1943 pfree(constraintexprs);
1944 pfree(force_notnull);
1946 ExecDropSingleTupleTableSlot(slot);
1948 ExecCloseIndices(resultRelInfo);
1950 FreeExecutorState(estate);
1955 * Read the next input line and stash it in line_buf, with conversion to
1958 * Result is true if read was terminated by EOF, false if terminated
1959 * by newline. The terminating newline or EOF marker is not included
1960 * in the final value of line_buf.
1963 CopyReadLine(CopyState cstate)
1967 /* Reset line_buf to empty */
1968 cstate->line_buf.len = 0;
1969 cstate->line_buf.data[0] = '\0';
1971 /* Mark that encoding conversion hasn't occurred yet */
1972 cstate->line_buf_converted = false;
1974 /* Parse data and transfer into line_buf */
1975 if (cstate->csv_mode)
1976 result = CopyReadLineCSV(cstate);
1978 result = CopyReadLineText(cstate);
1983 * Reached EOF. In protocol version 3, we should ignore anything
1984 * after \. up to the protocol end of copy data. (XXX maybe
1985 * better not to treat \. as special?)
1987 if (cstate->copy_dest == COPY_NEW_FE)
1990 cstate->raw_buf_index = cstate->raw_buf_len;
1991 } while (CopyLoadRawBuf(cstate));
1997 * If we didn't hit EOF, then we must have transferred the EOL marker
1998 * to line_buf along with the data. Get rid of it.
2000 switch (cstate->eol_type)
2003 Assert(cstate->line_buf.len >= 1);
2004 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2005 cstate->line_buf.len--;
2006 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2009 Assert(cstate->line_buf.len >= 1);
2010 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
2011 cstate->line_buf.len--;
2012 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2015 Assert(cstate->line_buf.len >= 2);
2016 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
2017 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2018 cstate->line_buf.len -= 2;
2019 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2022 /* shouldn't get here */
2028 /* Done reading the line. Convert it to server encoding. */
2029 if (cstate->need_transcoding)
2033 cvt = pg_client_to_server(cstate->line_buf.data,
2034 cstate->line_buf.len);
2035 if (cvt != cstate->line_buf.data)
2037 /* transfer converted data back to line_buf */
2038 cstate->line_buf.len = 0;
2039 cstate->line_buf.data[0] = '\0';
2040 appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
2045 /* Now it's safe to use the buffer in error messages */
2046 cstate->line_buf_converted = true;
2052 * CopyReadLineText - inner loop of CopyReadLine for non-CSV mode
2054 * If you need to change this, better look at CopyReadLineCSV too
2057 CopyReadLineText(CopyState cstate)
2069 /* set default status */
2073 * The objective of this loop is to transfer the entire next input
2074 * line into line_buf. Hence, we only care for detecting newlines
2075 * (\r and/or \n) and the end-of-copy marker (\.).
2077 * For backwards compatibility we allow backslashes to escape newline
2078 * characters. Backslashes other than the end marker get put into the
2079 * line_buf, since CopyReadAttributesText does its own escape processing.
2081 * These four characters, and only these four, are assumed the same in
2082 * frontend and backend encodings.
2084 * For speed, we try to move data to line_buf in chunks rather than
2085 * one character at a time. raw_buf_ptr points to the next character
2086 * to examine; any characters from raw_buf_index to raw_buf_ptr have
2087 * been determined to be part of the line, but not yet transferred
2090 * For a little extra speed within the loop, we copy raw_buf and
2091 * raw_buf_len into local variables.
2093 copy_raw_buf = cstate->raw_buf;
2094 raw_buf_ptr = cstate->raw_buf_index;
2095 copy_buf_len = cstate->raw_buf_len;
2096 need_data = false; /* flag to force reading more data */
2097 hit_eof = false; /* flag indicating no more data available */
2104 /* Load more data if needed */
2105 if (raw_buf_ptr >= copy_buf_len || need_data)
2108 * Transfer any approved data to line_buf; must do this to
2109 * be sure there is some room in raw_buf.
2111 if (raw_buf_ptr > cstate->raw_buf_index)
2113 appendBinaryStringInfo(&cstate->line_buf,
2114 cstate->raw_buf + cstate->raw_buf_index,
2115 raw_buf_ptr - cstate->raw_buf_index);
2116 cstate->raw_buf_index = raw_buf_ptr;
2119 * Try to read some more data. This will certainly reset
2120 * raw_buf_index to zero, and raw_buf_ptr must go with it.
2122 if (!CopyLoadRawBuf(cstate))
2125 copy_buf_len = cstate->raw_buf_len;
2127 * If we are completely out of data, break out of the loop,
2130 if (copy_buf_len <= 0)
2138 /* OK to fetch a character */
2139 prev_raw_ptr = raw_buf_ptr;
2140 c = copy_raw_buf[raw_buf_ptr++];
2144 /* Check for \r\n on first line, _and_ handle \r\n. */
2145 if (cstate->eol_type == EOL_UNKNOWN ||
2146 cstate->eol_type == EOL_CRNL)
2149 * If need more data, go back to loop top to load it.
2151 * Note that if we are at EOF, c will wind up as '\0'
2152 * because of the guaranteed pad of raw_buf.
2154 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2156 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2160 c = copy_raw_buf[raw_buf_ptr];
2164 raw_buf_ptr++; /* eat newline */
2165 cstate->eol_type = EOL_CRNL; /* in case not set yet */
2169 /* found \r, but no \n */
2170 if (cstate->eol_type == EOL_CRNL)
2172 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2173 errmsg("literal carriage return found in data"),
2174 errhint("Use \"\\r\" to represent carriage return.")));
2176 * if we got here, it is the first line and we didn't
2177 * find \n, so don't consume the peeked character
2179 cstate->eol_type = EOL_CR;
2182 else if (cstate->eol_type == EOL_NL)
2184 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2185 errmsg("literal carriage return found in data"),
2186 errhint("Use \"\\r\" to represent carriage return.")));
2187 /* If reach here, we have found the line terminator */
2193 if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
2195 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2196 errmsg("literal newline found in data"),
2197 errhint("Use \"\\n\" to represent newline.")));
2198 cstate->eol_type = EOL_NL; /* in case not set yet */
2199 /* If reach here, we have found the line terminator */
2206 * If need more data, go back to loop top to load it.
2208 if (raw_buf_ptr >= copy_buf_len)
2212 /* backslash just before EOF, treat as data char */
2216 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2222 * In non-CSV mode, backslash quotes the following character
2223 * even if it's a newline, so we always advance to next character
2225 c = copy_raw_buf[raw_buf_ptr++];
2229 if (cstate->eol_type == EOL_CRNL)
2231 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2233 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2237 /* if hit_eof, c will become '\0' */
2238 c = copy_raw_buf[raw_buf_ptr++];
2241 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2242 errmsg("end-of-copy marker does not match previous newline style")));
2245 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2246 errmsg("end-of-copy marker corrupt")));
2248 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2250 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2254 /* if hit_eof, c will become '\0' */
2255 c = copy_raw_buf[raw_buf_ptr++];
2256 if (c != '\r' && c != '\n')
2258 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2259 errmsg("end-of-copy marker corrupt")));
2260 if ((cstate->eol_type == EOL_NL && c != '\n') ||
2261 (cstate->eol_type == EOL_CRNL && c != '\n') ||
2262 (cstate->eol_type == EOL_CR && c != '\r'))
2264 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2265 errmsg("end-of-copy marker does not match previous newline style")));
2268 * Transfer only the data before the \. into line_buf,
2269 * then discard the data and the \. sequence.
2271 if (prev_raw_ptr > cstate->raw_buf_index)
2272 appendBinaryStringInfo(&cstate->line_buf,
2273 cstate->raw_buf + cstate->raw_buf_index,
2274 prev_raw_ptr - cstate->raw_buf_index);
2275 cstate->raw_buf_index = raw_buf_ptr;
2276 result = true; /* report EOF */
2282 * Do we need to be careful about trailing bytes of multibyte
2283 * characters? (See note above about client_only_encoding)
2285 * We assume here that pg_encoding_mblen only looks at the first
2286 * byte of the character!
2288 if (cstate->client_only_encoding)
2293 mblen = pg_encoding_mblen(cstate->client_encoding, s);
2294 if (raw_buf_ptr + (mblen-1) > copy_buf_len)
2298 /* consume the partial character (conversion will fail) */
2299 raw_buf_ptr = copy_buf_len;
2303 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2307 raw_buf_ptr += mblen-1;
2309 } /* end of outer loop */
2312 * Transfer any still-uncopied data to line_buf.
2314 if (raw_buf_ptr > cstate->raw_buf_index)
2316 appendBinaryStringInfo(&cstate->line_buf,
2317 cstate->raw_buf + cstate->raw_buf_index,
2318 raw_buf_ptr - cstate->raw_buf_index);
2319 cstate->raw_buf_index = raw_buf_ptr;
2326 * CopyReadLineCSV - inner loop of CopyReadLine for CSV mode
2328 * If you need to change this, better look at CopyReadLineText too
2331 CopyReadLineCSV(CopyState cstate)
2340 bool in_quote = false, last_was_esc = false;
2341 char quotec = cstate->quote[0];
2342 char escapec = cstate->escape[0];
2344 /* ignore special escape processing if it's the same as quotec */
2345 if (quotec == escapec)
2350 /* set default status */
2354 * The objective of this loop is to transfer the entire next input
2355 * line into line_buf. Hence, we only care for detecting newlines
2356 * (\r and/or \n) and the end-of-copy marker (\.).
2358 * In CSV mode, \r and \n inside a quoted field are just part of the
2359 * data value and are put in line_buf. We keep just enough state
2360 * to know if we are currently in a quoted field or not.
2362 * These four characters, and the CSV escape and quote characters,
2363 * are assumed the same in frontend and backend encodings.
2365 * For speed, we try to move data to line_buf in chunks rather than
2366 * one character at a time. raw_buf_ptr points to the next character
2367 * to examine; any characters from raw_buf_index to raw_buf_ptr have
2368 * been determined to be part of the line, but not yet transferred
2371 * For a little extra speed within the loop, we copy raw_buf and
2372 * raw_buf_len into local variables.
2374 copy_raw_buf = cstate->raw_buf;
2375 raw_buf_ptr = cstate->raw_buf_index;
2376 copy_buf_len = cstate->raw_buf_len;
2377 need_data = false; /* flag to force reading more data */
2378 hit_eof = false; /* flag indicating no more data available */
2385 /* Load more data if needed */
2386 if (raw_buf_ptr >= copy_buf_len || need_data)
2389 * Transfer any approved data to line_buf; must do this to
2390 * be sure there is some room in raw_buf.
2392 if (raw_buf_ptr > cstate->raw_buf_index)
2394 appendBinaryStringInfo(&cstate->line_buf,
2395 cstate->raw_buf + cstate->raw_buf_index,
2396 raw_buf_ptr - cstate->raw_buf_index);
2397 cstate->raw_buf_index = raw_buf_ptr;
2400 * Try to read some more data. This will certainly reset
2401 * raw_buf_index to zero, and raw_buf_ptr must go with it.
2403 if (!CopyLoadRawBuf(cstate))
2406 copy_buf_len = cstate->raw_buf_len;
2408 * If we are completely out of data, break out of the loop,
2411 if (copy_buf_len <= 0)
2419 /* OK to fetch a character */
2420 prev_raw_ptr = raw_buf_ptr;
2421 c = copy_raw_buf[raw_buf_ptr++];
2424 * If character is '\\' or '\r', we may need to look ahead below.
2425 * Force fetch of the next character if we don't already have it.
2426 * We need to do this before changing CSV state, in case one of
2427 * these characters is also the quote or escape character.
2429 * Note: old-protocol does not like forced prefetch, but it's OK
2430 * here since we cannot validly be at EOF.
2432 if (c == '\\' || c == '\r')
2434 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2436 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2443 * Dealing with quotes and escapes here is mildly tricky. If the
2444 * quote char is also the escape char, there's no problem - we
2445 * just use the char as a toggle. If they are different, we need
2446 * to ensure that we only take account of an escape inside a quoted
2447 * field and immediately preceding a quote char, and not the
2448 * second in a escape-escape sequence.
2450 if (in_quote && c == escapec)
2451 last_was_esc = ! last_was_esc;
2452 if (c == quotec && ! last_was_esc)
2453 in_quote = ! in_quote;
2455 last_was_esc = false;
2458 * Updating the line count for embedded CR and/or LF chars is
2459 * necessarily a little fragile - this test is probably about
2460 * the best we can do. (XXX it's arguable whether we should
2461 * do this at all --- is cur_lineno a physical or logical count?)
2463 if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
2464 cstate->cur_lineno++;
2466 if (c == '\r' && !in_quote)
2468 /* Check for \r\n on first line, _and_ handle \r\n. */
2469 if (cstate->eol_type == EOL_UNKNOWN ||
2470 cstate->eol_type == EOL_CRNL)
2473 * If need more data, go back to loop top to load it.
2475 * Note that if we are at EOF, c will wind up as '\0'
2476 * because of the guaranteed pad of raw_buf.
2478 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2480 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2484 c = copy_raw_buf[raw_buf_ptr];
2488 raw_buf_ptr++; /* eat newline */
2489 cstate->eol_type = EOL_CRNL; /* in case not set yet */
2493 /* found \r, but no \n */
2494 if (cstate->eol_type == EOL_CRNL)
2496 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2497 errmsg("unquoted carriage return found in data"),
2498 errhint("Use quoted CSV field to represent carriage return.")));
2500 * if we got here, it is the first line and we didn't
2501 * find \n, so don't consume the peeked character
2503 cstate->eol_type = EOL_CR;
2506 else if (cstate->eol_type == EOL_NL)
2508 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2509 errmsg("unquoted carriage return found in CSV data"),
2510 errhint("Use quoted CSV field to represent carriage return.")));
2511 /* If reach here, we have found the line terminator */
2515 if (c == '\n' && !in_quote)
2517 if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
2519 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2520 errmsg("unquoted newline found in data"),
2521 errhint("Use quoted CSV field to represent newline.")));
2522 cstate->eol_type = EOL_NL; /* in case not set yet */
2523 /* If reach here, we have found the line terminator */
2528 * In CSV mode, we only recognize \. at start of line
2530 if (c == '\\' && cstate->line_buf.len == 0)
2535 * If need more data, go back to loop top to load it.
2537 if (raw_buf_ptr >= copy_buf_len)
2541 /* backslash just before EOF, treat as data char */
2545 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2551 * Note: we do not change c here since we aren't treating \
2552 * as escaping the next character.
2554 c2 = copy_raw_buf[raw_buf_ptr];
2558 raw_buf_ptr++; /* consume the '.' */
2561 * Note: if we loop back for more data here, it does not
2562 * matter that the CSV state change checks are re-executed;
2563 * we will come back here with no important state changed.
2565 if (cstate->eol_type == EOL_CRNL)
2567 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2569 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2573 /* if hit_eof, c2 will become '\0' */
2574 c2 = copy_raw_buf[raw_buf_ptr++];
2577 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2578 errmsg("end-of-copy marker does not match previous newline style")));
2581 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2582 errmsg("end-of-copy marker corrupt")));
2584 if (raw_buf_ptr >= copy_buf_len && !hit_eof)
2586 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2590 /* if hit_eof, c2 will become '\0' */
2591 c2 = copy_raw_buf[raw_buf_ptr++];
2592 if (c2 != '\r' && c2 != '\n')
2594 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2595 errmsg("end-of-copy marker corrupt")));
2596 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
2597 (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
2598 (cstate->eol_type == EOL_CR && c2 != '\r'))
2600 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2601 errmsg("end-of-copy marker does not match previous newline style")));
2604 * Transfer only the data before the \. into line_buf,
2605 * then discard the data and the \. sequence.
2607 if (prev_raw_ptr > cstate->raw_buf_index)
2608 appendBinaryStringInfo(&cstate->line_buf, cstate->raw_buf + cstate->raw_buf_index,
2609 prev_raw_ptr - cstate->raw_buf_index);
2610 cstate->raw_buf_index = raw_buf_ptr;
2611 result = true; /* report EOF */
2617 * Do we need to be careful about trailing bytes of multibyte
2618 * characters? (See note above about client_only_encoding)
2620 * We assume here that pg_encoding_mblen only looks at the first
2621 * byte of the character!
2623 if (cstate->client_only_encoding)
2628 mblen = pg_encoding_mblen(cstate->client_encoding, s);
2629 if (raw_buf_ptr + (mblen-1) > copy_buf_len)
2633 /* consume the partial character (will fail below) */
2634 raw_buf_ptr = copy_buf_len;
2638 raw_buf_ptr = prev_raw_ptr; /* undo fetch */
2642 raw_buf_ptr += mblen-1;
2644 } /* end of outer loop */
2647 * Transfer any still-uncopied data to line_buf.
2649 if (raw_buf_ptr > cstate->raw_buf_index)
2651 appendBinaryStringInfo(&cstate->line_buf,
2652 cstate->raw_buf + cstate->raw_buf_index,
2653 raw_buf_ptr - cstate->raw_buf_index);
2654 cstate->raw_buf_index = raw_buf_ptr;
2661 * Return decimal value for a hexadecimal digit
2664 GetDecimalFromHex(char hex)
2666 if (isdigit((unsigned char) hex))
2669 return tolower((unsigned char) hex) - 'a' + 10;
2673 * Parse the current line into separate attributes (fields),
2674 * performing de-escaping as needed.
2676 * The input is in line_buf. We use attribute_buf to hold the result
2677 * strings. fieldvals[k] is set to point to the k'th attribute string,
2678 * or NULL when the input matches the null marker string. (Note that the
2679 * caller cannot check for nulls since the returned string would be the
2680 * post-de-escaping equivalent, which may look the same as some valid data
2683 * delim is the column delimiter string (must be just one byte for now).
2684 * null_print is the null marker string. Note that this is compared to
2685 * the pre-de-escaped input string.
2687 * The return value is the number of fields actually read. (We error out
2688 * if this would exceed maxfields, which is the length of fieldvals[].)
2691 CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
2693 char delimc = cstate->delim[0];
2700 * We need a special case for zero-column tables: check that the input
2701 * line is empty, and return.
2705 if (cstate->line_buf.len != 0)
2707 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2708 errmsg("extra data after last expected column")));
2712 /* reset attribute_buf to empty */
2713 cstate->attribute_buf.len = 0;
2714 cstate->attribute_buf.data[0] = '\0';
2717 * The de-escaped attributes will certainly not be longer than the input
2718 * data line, so we can just force attribute_buf to be large enough and
2719 * then transfer data without any checks for enough space. We need to
2720 * do it this way because enlarging attribute_buf mid-stream would
2721 * invalidate pointers already stored into fieldvals[].
2723 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
2724 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
2725 output_ptr = cstate->attribute_buf.data;
2727 /* set pointer variables for loop */
2728 cur_ptr = cstate->line_buf.data;
2729 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
2731 /* Outer loop iterates over fields */
2735 bool found_delim = false;
2740 /* Make sure space remains in fieldvals[] */
2741 if (fieldno >= maxfields)
2743 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2744 errmsg("extra data after last expected column")));
2746 /* Remember start of field on both input and output sides */
2747 start_ptr = cur_ptr;
2748 fieldvals[fieldno] = output_ptr;
2750 /* Scan data for field */
2756 if (cur_ptr >= line_end_ptr)
2766 if (cur_ptr >= line_end_ptr)
2784 if (cur_ptr < line_end_ptr)
2790 val = (val << 3) + OCTVALUE(c);
2791 if (cur_ptr < line_end_ptr)
2797 val = (val << 3) + OCTVALUE(c);
2807 if (cur_ptr < line_end_ptr)
2809 char hexchar = *cur_ptr;
2811 if (isxdigit((unsigned char) hexchar))
2813 int val = GetDecimalFromHex(hexchar);
2816 if (cur_ptr < line_end_ptr)
2819 if (isxdigit((unsigned char) hexchar))
2822 val = (val << 4) + GetDecimalFromHex(hexchar);
2849 * in all other cases, take the char after '\'
2855 /* Add c to output string */
2859 /* Terminate attribute value in output area */
2860 *output_ptr++ = '\0';
2862 /* Check whether raw input matched null marker */
2863 input_len = end_ptr - start_ptr;
2864 if (input_len == cstate->null_print_len &&
2865 strncmp(start_ptr, cstate->null_print, input_len) == 0)
2866 fieldvals[fieldno] = NULL;
2869 /* Done if we hit EOL instead of a delim */
2874 /* Clean up state of attribute_buf */
2876 Assert(*output_ptr == '\0');
2877 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
2883 * Parse the current line into separate attributes (fields),
2884 * performing de-escaping as needed. This has exactly the same API as
2885 * CopyReadAttributesText, except we parse the fields according to
2886 * "standard" (i.e. common) CSV usage.
2889 CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
2891 char delimc = cstate->delim[0];
2892 char quotec = cstate->quote[0];
2893 char escapec = cstate->escape[0];
2900 * We need a special case for zero-column tables: check that the input
2901 * line is empty, and return.
2905 if (cstate->line_buf.len != 0)
2907 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2908 errmsg("extra data after last expected column")));
2912 /* reset attribute_buf to empty */
2913 cstate->attribute_buf.len = 0;
2914 cstate->attribute_buf.data[0] = '\0';
2917 * The de-escaped attributes will certainly not be longer than the input
2918 * data line, so we can just force attribute_buf to be large enough and
2919 * then transfer data without any checks for enough space. We need to
2920 * do it this way because enlarging attribute_buf mid-stream would
2921 * invalidate pointers already stored into fieldvals[].
2923 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
2924 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
2925 output_ptr = cstate->attribute_buf.data;
2927 /* set pointer variables for loop */
2928 cur_ptr = cstate->line_buf.data;
2929 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
2931 /* Outer loop iterates over fields */
2935 bool found_delim = false;
2936 bool in_quote = false;
2937 bool saw_quote = false;
2942 /* Make sure space remains in fieldvals[] */
2943 if (fieldno >= maxfields)
2945 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2946 errmsg("extra data after last expected column")));
2948 /* Remember start of field on both input and output sides */
2949 start_ptr = cur_ptr;
2950 fieldvals[fieldno] = output_ptr;
2952 /* Scan data for field */
2958 if (cur_ptr >= line_end_ptr)
2961 /* unquoted field delimiter */
2962 if (c == delimc && !in_quote)
2967 /* start of quoted field (or part of field) */
2968 if (c == quotec && !in_quote)
2974 /* escape within a quoted field */
2975 if (c == escapec && in_quote)
2978 * peek at the next char if available, and escape it if it is
2979 * an escape char or a quote char
2981 if (cur_ptr < line_end_ptr)
2983 char nextc = *cur_ptr;
2985 if (nextc == escapec || nextc == quotec)
2987 *output_ptr++ = nextc;
2994 * end of quoted field. Must do this test after testing for escape
2995 * in case quote char and escape char are the same (which is the
2998 if (c == quotec && in_quote)
3004 /* Add c to output string */
3008 /* Terminate attribute value in output area */
3009 *output_ptr++ = '\0';
3011 /* Shouldn't still be in quote mode */
3014 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3015 errmsg("unterminated CSV quoted field")));
3017 /* Check whether raw input matched null marker */
3018 input_len = end_ptr - start_ptr;
3019 if (!saw_quote && input_len == cstate->null_print_len &&
3020 strncmp(start_ptr, cstate->null_print, input_len) == 0)
3021 fieldvals[fieldno] = NULL;
3024 /* Done if we hit EOL instead of a delim */
3029 /* Clean up state of attribute_buf */
3031 Assert(*output_ptr == '\0');
3032 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3039 * Read a binary attribute
3042 CopyReadBinaryAttribute(CopyState cstate,
3043 int column_no, FmgrInfo *flinfo,
3044 Oid typioparam, int32 typmod,
3050 if (!CopyGetInt32(cstate, &fld_size))
3052 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3053 errmsg("unexpected EOF in COPY data")));
3061 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3062 errmsg("invalid field size")));
3064 /* reset attribute_buf to empty, and load raw data in it */
3065 cstate->attribute_buf.len = 0;
3066 cstate->attribute_buf.data[0] = '\0';
3067 cstate->attribute_buf.cursor = 0;
3069 enlargeStringInfo(&cstate->attribute_buf, fld_size);
3071 if (CopyGetData(cstate, cstate->attribute_buf.data,
3072 fld_size, fld_size) != fld_size)
3074 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3075 errmsg("unexpected EOF in COPY data")));
3077 cstate->attribute_buf.len = fld_size;
3078 cstate->attribute_buf.data[fld_size] = '\0';
3080 /* Call the column type's binary input converter */
3081 result = FunctionCall3(flinfo,
3082 PointerGetDatum(&cstate->attribute_buf),
3083 ObjectIdGetDatum(typioparam),
3084 Int32GetDatum(typmod));
3086 /* Trouble if it didn't eat the whole buffer */
3087 if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3089 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3090 errmsg("incorrect binary data format")));
3097 * Send text representation of one attribute, with conversion and escaping
3100 CopyAttributeOutText(CopyState cstate, char *server_string)
3104 char delimc = cstate->delim[0];
3107 if (cstate->need_transcoding)
3108 string = pg_server_to_client(server_string, strlen(server_string));
3110 string = server_string;
3112 for (; (c = *string) != '\0'; string += mblen)
3119 CopySendString(cstate, "\\b");
3122 CopySendString(cstate, "\\f");
3125 CopySendString(cstate, "\\n");
3128 CopySendString(cstate, "\\r");
3131 CopySendString(cstate, "\\t");
3134 CopySendString(cstate, "\\v");
3137 CopySendString(cstate, "\\\\");
3141 CopySendChar(cstate, '\\');
3144 * We can skip pg_encoding_mblen() overhead when encoding
3145 * is safe, because in valid backend encodings, extra
3146 * bytes of a multibyte character never look like ASCII.
3148 if (cstate->client_only_encoding)
3149 mblen = pg_encoding_mblen(cstate->client_encoding, string);
3150 CopySendData(cstate, string, mblen);
3157 * Send CSV representation of one attribute, with conversion and
3161 CopyAttributeOutCSV(CopyState cstate, char *server_string,
3166 char delimc = cstate->delim[0];
3167 char quotec = cstate->quote[0];
3168 char escapec = cstate->escape[0];
3172 /* force quoting if it matches null_print */
3173 if (!use_quote && strcmp(server_string, cstate->null_print) == 0)
3176 if (cstate->need_transcoding)
3177 string = pg_server_to_client(server_string, strlen(server_string));
3179 string = server_string;
3182 * have to run through the string twice, first time to see if it needs
3183 * quoting, second to actually send it
3187 for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
3189 if (c == delimc || c == quotec || c == '\n' || c == '\r')
3194 if (cstate->client_only_encoding)
3195 mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
3202 CopySendChar(cstate, quotec);
3204 for (; (c = *string) != '\0'; string += mblen)
3206 if (use_quote && (c == quotec || c == escapec))
3207 CopySendChar(cstate, escapec);
3208 if (cstate->client_only_encoding)
3209 mblen = pg_encoding_mblen(cstate->client_encoding, string);
3212 CopySendData(cstate, string, mblen);
3216 CopySendChar(cstate, quotec);
3220 * CopyGetAttnums - build an integer list of attnums to be copied
3222 * The input attnamelist is either the user-specified column list,
3223 * or NIL if there was none (in which case we want all the non-dropped
3227 CopyGetAttnums(Relation rel, List *attnamelist)
3229 List *attnums = NIL;
3231 if (attnamelist == NIL)
3233 /* Generate default column list */
3234 TupleDesc tupDesc = RelationGetDescr(rel);
3235 Form_pg_attribute *attr = tupDesc->attrs;
3236 int attr_count = tupDesc->natts;
3239 for (i = 0; i < attr_count; i++)
3241 if (attr[i]->attisdropped)
3243 attnums = lappend_int(attnums, i + 1);
3248 /* Validate the user-supplied list and extract attnums */
3251 foreach(l, attnamelist)
3253 char *name = strVal(lfirst(l));
3256 /* Lookup column name, ereport on failure */
3257 /* Note we disallow system columns here */
3258 attnum = attnameAttNum(rel, name, false);
3259 /* Check for duplicates */
3260 if (list_member_int(attnums, attnum))
3262 (errcode(ERRCODE_DUPLICATE_COLUMN),
3263 errmsg("column \"%s\" specified more than once",
3265 attnums = lappend_int(attnums, attnum);