]> granicus.if.org Git - postgresql/blob - src/backend/commands/copy.c
Add a materialized view relations.
[postgresql] / src / backend / commands / copy.c
1 /*-------------------------------------------------------------------------
2  *
3  * copy.c
4  *              Implements the COPY utility command
5  *
6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/commands/copy.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <ctype.h>
18 #include <unistd.h>
19 #include <sys/stat.h>
20 #include <netinet/in.h>
21 #include <arpa/inet.h>
22
23 #include "access/heapam.h"
24 #include "access/htup_details.h"
25 #include "access/sysattr.h"
26 #include "access/xact.h"
27 #include "catalog/namespace.h"
28 #include "catalog/pg_type.h"
29 #include "commands/copy.h"
30 #include "commands/defrem.h"
31 #include "commands/trigger.h"
32 #include "executor/executor.h"
33 #include "libpq/libpq.h"
34 #include "libpq/pqformat.h"
35 #include "mb/pg_wchar.h"
36 #include "miscadmin.h"
37 #include "optimizer/clauses.h"
38 #include "optimizer/planner.h"
39 #include "parser/parse_relation.h"
40 #include "rewrite/rewriteHandler.h"
41 #include "storage/fd.h"
42 #include "tcop/tcopprot.h"
43 #include "utils/acl.h"
44 #include "utils/builtins.h"
45 #include "utils/lsyscache.h"
46 #include "utils/memutils.h"
47 #include "utils/portal.h"
48 #include "utils/rel.h"
49 #include "utils/snapmgr.h"
50
51
52 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
53 #define OCTVALUE(c) ((c) - '0')
54
55 /*
56  * Represents the different source/dest cases we need to worry about at
57  * the bottom level
58  */
59 typedef enum CopyDest
60 {
61         COPY_FILE,                                      /* to/from file (or a piped program) */
62         COPY_OLD_FE,                            /* to/from frontend (2.0 protocol) */
63         COPY_NEW_FE                                     /* to/from frontend (3.0 protocol) */
64 } CopyDest;
65
66 /*
67  *      Represents the end-of-line terminator type of the input
68  */
69 typedef enum EolType
70 {
71         EOL_UNKNOWN,
72         EOL_NL,
73         EOL_CR,
74         EOL_CRNL
75 } EolType;
76
77 /*
78  * This struct contains all the state variables used throughout a COPY
79  * operation. For simplicity, we use the same struct for all variants of COPY,
80  * even though some fields are used in only some cases.
81  *
82  * Multi-byte encodings: all supported client-side encodings encode multi-byte
83  * characters by having the first byte's high bit set. Subsequent bytes of the
84  * character can have the high bit not set. When scanning data in such an
85  * encoding to look for a match to a single-byte (ie ASCII) character, we must
86  * use the full pg_encoding_mblen() machinery to skip over multibyte
87  * characters, else we might find a false match to a trailing byte. In
88  * supported server encodings, there is no possibility of a false match, and
89  * it's faster to make useless comparisons to trailing bytes than it is to
90  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
91  * when we have to do it the hard way.
92  */
93 typedef struct CopyStateData
94 {
95         /* low-level state data */
96         CopyDest        copy_dest;              /* type of copy source/destination */
97         FILE       *copy_file;          /* used if copy_dest == COPY_FILE */
98         StringInfo      fe_msgbuf;              /* used for all dests during COPY TO, only for
99                                                                  * dest == COPY_NEW_FE in COPY FROM */
100         bool            fe_eof;                 /* true if detected end of copy data */
101         EolType         eol_type;               /* EOL type of input */
102         int                     file_encoding;  /* file or remote side's character encoding */
103         bool            need_transcoding;               /* file encoding diff from server? */
104         bool            encoding_embeds_ascii;  /* ASCII can be non-first byte? */
105
106         /* parameters from the COPY command */
107         Relation        rel;                    /* relation to copy to or from */
108         QueryDesc  *queryDesc;          /* executable query to copy from */
109         List       *attnumlist;         /* integer list of attnums to copy */
110         char       *filename;           /* filename, or NULL for STDIN/STDOUT */
111         bool            is_program;             /* is 'filename' a program to popen? */
112         bool            binary;                 /* binary format? */
113         bool            oids;                   /* include OIDs? */
114         bool            freeze;                 /* freeze rows on loading? */
115         bool            csv_mode;               /* Comma Separated Value format? */
116         bool            header_line;    /* CSV header line? */
117         char       *null_print;         /* NULL marker string (server encoding!) */
118         int                     null_print_len; /* length of same */
119         char       *null_print_client;          /* same converted to file encoding */
120         char       *delim;                      /* column delimiter (must be 1 byte) */
121         char       *quote;                      /* CSV quote char (must be 1 byte) */
122         char       *escape;                     /* CSV escape char (must be 1 byte) */
123         List       *force_quote;        /* list of column names */
124         bool            force_quote_all;        /* FORCE QUOTE *? */
125         bool       *force_quote_flags;          /* per-column CSV FQ flags */
126         List       *force_notnull;      /* list of column names */
127         bool       *force_notnull_flags;        /* per-column CSV FNN flags */
128         bool            convert_selectively;    /* do selective binary conversion? */
129         List       *convert_select;     /* list of column names (can be NIL) */
130         bool       *convert_select_flags;       /* per-column CSV/TEXT CS flags */
131
132         /* these are just for error messages, see CopyFromErrorCallback */
133         const char *cur_relname;        /* table name for error messages */
134         int                     cur_lineno;             /* line number for error messages */
135         const char *cur_attname;        /* current att for error messages */
136         const char *cur_attval;         /* current att value for error messages */
137
138         /*
139          * Working state for COPY TO/FROM
140          */
141         MemoryContext copycontext;      /* per-copy execution context */
142
143         /*
144          * Working state for COPY TO
145          */
146         FmgrInfo   *out_functions;      /* lookup info for output functions */
147         MemoryContext rowcontext;       /* per-row evaluation context */
148
149         /*
150          * Working state for COPY FROM
151          */
152         AttrNumber      num_defaults;
153         bool            file_has_oids;
154         FmgrInfo        oid_in_function;
155         Oid                     oid_typioparam;
156         FmgrInfo   *in_functions;       /* array of input functions for each attrs */
157         Oid                *typioparams;        /* array of element types for in_functions */
158         int                *defmap;                     /* array of default att numbers */
159         ExprState **defexprs;           /* array of default att expressions */
160         bool            volatile_defexprs;              /* is any of defexprs volatile? */
161
162         /*
163          * These variables are used to reduce overhead in textual COPY FROM.
164          *
165          * attribute_buf holds the separated, de-escaped text for each field of
166          * the current line.  The CopyReadAttributes functions return arrays of
167          * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
168          * the buffer on each cycle.
169          */
170         StringInfoData attribute_buf;
171
172         /* field raw data pointers found by COPY FROM */
173
174         int                     max_fields;
175         char      **raw_fields;
176
177         /*
178          * Similarly, line_buf holds the whole input line being processed. The
179          * input cycle is first to read the whole line into line_buf, convert it
180          * to server encoding there, and then extract the individual attribute
181          * fields into attribute_buf.  line_buf is preserved unmodified so that we
182          * can display it in error messages if appropriate.
183          */
184         StringInfoData line_buf;
185         bool            line_buf_converted;             /* converted to server encoding? */
186
187         /*
188          * Finally, raw_buf holds raw data read from the data source (file or
189          * client connection).  CopyReadLine parses this data sufficiently to
190          * locate line boundaries, then transfers the data to line_buf and
191          * converts it.  Note: we guarantee that there is a \0 at
192          * raw_buf[raw_buf_len].
193          */
194 #define RAW_BUF_SIZE 65536              /* we palloc RAW_BUF_SIZE+1 bytes */
195         char       *raw_buf;
196         int                     raw_buf_index;  /* next byte to process */
197         int                     raw_buf_len;    /* total # of bytes stored */
198 } CopyStateData;
199
200 /* DestReceiver for COPY (SELECT) TO */
201 typedef struct
202 {
203         DestReceiver pub;                       /* publicly-known function pointers */
204         CopyState       cstate;                 /* CopyStateData for the command */
205         uint64          processed;              /* # of tuples processed */
206 } DR_copy;
207
208
209 /*
210  * These macros centralize code used to process line_buf and raw_buf buffers.
211  * They are macros because they often do continue/break control and to avoid
212  * function call overhead in tight COPY loops.
213  *
214  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
215  * prevent the continue/break processing from working.  We end the "if (1)"
216  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
217  * any "else" in the calling code, and to avoid any compiler warnings about
218  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
219  */
220
221 /*
222  * This keeps the character read at the top of the loop in the buffer
223  * even if there is more than one read-ahead.
224  */
225 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
226 if (1) \
227 { \
228         if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
229         { \
230                 raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
231                 need_data = true; \
232                 continue; \
233         } \
234 } else ((void) 0)
235
236 /* This consumes the remainder of the buffer and breaks */
237 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
238 if (1) \
239 { \
240         if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
241         { \
242                 if (extralen) \
243                         raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
244                 /* backslash just before EOF, treat as data char */ \
245                 result = true; \
246                 break; \
247         } \
248 } else ((void) 0)
249
250 /*
251  * Transfer any approved data to line_buf; must do this to be sure
252  * there is some room in raw_buf.
253  */
254 #define REFILL_LINEBUF \
255 if (1) \
256 { \
257         if (raw_buf_ptr > cstate->raw_buf_index) \
258         { \
259                 appendBinaryStringInfo(&cstate->line_buf, \
260                                                          cstate->raw_buf + cstate->raw_buf_index, \
261                                                            raw_buf_ptr - cstate->raw_buf_index); \
262                 cstate->raw_buf_index = raw_buf_ptr; \
263         } \
264 } else ((void) 0)
265
266 /* Undo any read-ahead and jump out of the block. */
267 #define NO_END_OF_COPY_GOTO \
268 if (1) \
269 { \
270         raw_buf_ptr = prev_raw_ptr + 1; \
271         goto not_end_of_copy; \
272 } else ((void) 0)
273
274 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
275
276
277 /* non-export function prototypes */
278 static CopyState BeginCopy(bool is_from, Relation rel, Node *raw_query,
279                   const char *queryString, List *attnamelist, List *options);
280 static void EndCopy(CopyState cstate);
281 static void ClosePipeToProgram(CopyState cstate);
282 static CopyState BeginCopyTo(Relation rel, Node *query, const char *queryString,
283                         const char *filename, bool is_program, List *attnamelist,
284                         List *options);
285 static void EndCopyTo(CopyState cstate);
286 static uint64 DoCopyTo(CopyState cstate);
287 static uint64 CopyTo(CopyState cstate);
288 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
289                          Datum *values, bool *nulls);
290 static uint64 CopyFrom(CopyState cstate);
291 static void CopyFromInsertBatch(CopyState cstate, EState *estate,
292                                         CommandId mycid, int hi_options,
293                                         ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
294                                         BulkInsertState bistate,
295                                         int nBufferedTuples, HeapTuple *bufferedTuples);
296 static bool CopyReadLine(CopyState cstate);
297 static bool CopyReadLineText(CopyState cstate);
298 static int      CopyReadAttributesText(CopyState cstate);
299 static int      CopyReadAttributesCSV(CopyState cstate);
300 static Datum CopyReadBinaryAttribute(CopyState cstate,
301                                                 int column_no, FmgrInfo *flinfo,
302                                                 Oid typioparam, int32 typmod,
303                                                 bool *isnull);
304 static void CopyAttributeOutText(CopyState cstate, char *string);
305 static void CopyAttributeOutCSV(CopyState cstate, char *string,
306                                         bool use_quote, bool single_attr);
307 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
308                            List *attnamelist);
309 static char *limit_printout_length(const char *str);
310
311 /* Low-level communications functions */
312 static void SendCopyBegin(CopyState cstate);
313 static void ReceiveCopyBegin(CopyState cstate);
314 static void SendCopyEnd(CopyState cstate);
315 static void CopySendData(CopyState cstate, const void *databuf, int datasize);
316 static void CopySendString(CopyState cstate, const char *str);
317 static void CopySendChar(CopyState cstate, char c);
318 static void CopySendEndOfRow(CopyState cstate);
319 static int CopyGetData(CopyState cstate, void *databuf,
320                         int minread, int maxread);
321 static void CopySendInt32(CopyState cstate, int32 val);
322 static bool CopyGetInt32(CopyState cstate, int32 *val);
323 static void CopySendInt16(CopyState cstate, int16 val);
324 static bool CopyGetInt16(CopyState cstate, int16 *val);
325
326
327 /*
328  * Send copy start/stop messages for frontend copies.  These have changed
329  * in past protocol redesigns.
330  */
331 static void
332 SendCopyBegin(CopyState cstate)
333 {
334         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
335         {
336                 /* new way */
337                 StringInfoData buf;
338                 int                     natts = list_length(cstate->attnumlist);
339                 int16           format = (cstate->binary ? 1 : 0);
340                 int                     i;
341
342                 pq_beginmessage(&buf, 'H');
343                 pq_sendbyte(&buf, format);              /* overall format */
344                 pq_sendint(&buf, natts, 2);
345                 for (i = 0; i < natts; i++)
346                         pq_sendint(&buf, format, 2);            /* per-column formats */
347                 pq_endmessage(&buf);
348                 cstate->copy_dest = COPY_NEW_FE;
349         }
350         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
351         {
352                 /* old way */
353                 if (cstate->binary)
354                         ereport(ERROR,
355                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
356                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
357                 pq_putemptymessage('H');
358                 /* grottiness needed for old COPY OUT protocol */
359                 pq_startcopyout();
360                 cstate->copy_dest = COPY_OLD_FE;
361         }
362         else
363         {
364                 /* very old way */
365                 if (cstate->binary)
366                         ereport(ERROR,
367                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
368                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
369                 pq_putemptymessage('B');
370                 /* grottiness needed for old COPY OUT protocol */
371                 pq_startcopyout();
372                 cstate->copy_dest = COPY_OLD_FE;
373         }
374 }
375
376 static void
377 ReceiveCopyBegin(CopyState cstate)
378 {
379         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
380         {
381                 /* new way */
382                 StringInfoData buf;
383                 int                     natts = list_length(cstate->attnumlist);
384                 int16           format = (cstate->binary ? 1 : 0);
385                 int                     i;
386
387                 pq_beginmessage(&buf, 'G');
388                 pq_sendbyte(&buf, format);              /* overall format */
389                 pq_sendint(&buf, natts, 2);
390                 for (i = 0; i < natts; i++)
391                         pq_sendint(&buf, format, 2);            /* per-column formats */
392                 pq_endmessage(&buf);
393                 cstate->copy_dest = COPY_NEW_FE;
394                 cstate->fe_msgbuf = makeStringInfo();
395         }
396         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
397         {
398                 /* old way */
399                 if (cstate->binary)
400                         ereport(ERROR,
401                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
402                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
403                 pq_putemptymessage('G');
404                 cstate->copy_dest = COPY_OLD_FE;
405         }
406         else
407         {
408                 /* very old way */
409                 if (cstate->binary)
410                         ereport(ERROR,
411                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
412                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
413                 pq_putemptymessage('D');
414                 cstate->copy_dest = COPY_OLD_FE;
415         }
416         /* We *must* flush here to ensure FE knows it can send. */
417         pq_flush();
418 }
419
420 static void
421 SendCopyEnd(CopyState cstate)
422 {
423         if (cstate->copy_dest == COPY_NEW_FE)
424         {
425                 /* Shouldn't have any unsent data */
426                 Assert(cstate->fe_msgbuf->len == 0);
427                 /* Send Copy Done message */
428                 pq_putemptymessage('c');
429         }
430         else
431         {
432                 CopySendData(cstate, "\\.", 2);
433                 /* Need to flush out the trailer (this also appends a newline) */
434                 CopySendEndOfRow(cstate);
435                 pq_endcopyout(false);
436         }
437 }
438
439 /*----------
440  * CopySendData sends output data to the destination (file or frontend)
441  * CopySendString does the same for null-terminated strings
442  * CopySendChar does the same for single characters
443  * CopySendEndOfRow does the appropriate thing at end of each data row
444  *      (data is not actually flushed except by CopySendEndOfRow)
445  *
446  * NB: no data conversion is applied by these functions
447  *----------
448  */
449 static void
450 CopySendData(CopyState cstate, const void *databuf, int datasize)
451 {
452         appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
453 }
454
455 static void
456 CopySendString(CopyState cstate, const char *str)
457 {
458         appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
459 }
460
461 static void
462 CopySendChar(CopyState cstate, char c)
463 {
464         appendStringInfoCharMacro(cstate->fe_msgbuf, c);
465 }
466
467 static void
468 CopySendEndOfRow(CopyState cstate)
469 {
470         StringInfo      fe_msgbuf = cstate->fe_msgbuf;
471
472         switch (cstate->copy_dest)
473         {
474                 case COPY_FILE:
475                         if (!cstate->binary)
476                         {
477                                 /* Default line termination depends on platform */
478 #ifndef WIN32
479                                 CopySendChar(cstate, '\n');
480 #else
481                                 CopySendString(cstate, "\r\n");
482 #endif
483                         }
484
485                         if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
486                                            cstate->copy_file) != 1 ||
487                                 ferror(cstate->copy_file))
488                         {
489                                 if (cstate->is_program)
490                                 {
491                                         if (errno == EPIPE)
492                                         {
493                                                 /*
494                                                  * The pipe will be closed automatically on error at
495                                                  * the end of transaction, but we might get a better
496                                                  * error message from the subprocess' exit code than
497                                                  * just "Broken Pipe"
498                                                  */
499                                                 ClosePipeToProgram(cstate);
500
501                                                 /*
502                                                  * If ClosePipeToProgram() didn't throw an error,
503                                                  * the program terminated normally, but closed the
504                                                  * pipe first. Restore errno, and throw an error.
505                                                  */
506                                                 errno = EPIPE;
507                                         }
508                                         ereport(ERROR,
509                                                         (errcode_for_file_access(),
510                                                          errmsg("could not write to COPY program: %m")));
511                                 }
512                                 else
513                                         ereport(ERROR,
514                                                         (errcode_for_file_access(),
515                                                          errmsg("could not write to COPY file: %m")));
516                         }
517                         break;
518                 case COPY_OLD_FE:
519                         /* The FE/BE protocol uses \n as newline for all platforms */
520                         if (!cstate->binary)
521                                 CopySendChar(cstate, '\n');
522
523                         if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
524                         {
525                                 /* no hope of recovering connection sync, so FATAL */
526                                 ereport(FATAL,
527                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
528                                                  errmsg("connection lost during COPY to stdout")));
529                         }
530                         break;
531                 case COPY_NEW_FE:
532                         /* The FE/BE protocol uses \n as newline for all platforms */
533                         if (!cstate->binary)
534                                 CopySendChar(cstate, '\n');
535
536                         /* Dump the accumulated row as one CopyData message */
537                         (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
538                         break;
539         }
540
541         resetStringInfo(fe_msgbuf);
542 }
543
544 /*
545  * CopyGetData reads data from the source (file or frontend)
546  *
547  * We attempt to read at least minread, and at most maxread, bytes from
548  * the source.  The actual number of bytes read is returned; if this is
549  * less than minread, EOF was detected.
550  *
551  * Note: when copying from the frontend, we expect a proper EOF mark per
552  * protocol; if the frontend simply drops the connection, we raise error.
553  * It seems unwise to allow the COPY IN to complete normally in that case.
554  *
555  * NB: no data conversion is applied here.
556  */
557 static int
558 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
559 {
560         int                     bytesread = 0;
561
562         switch (cstate->copy_dest)
563         {
564                 case COPY_FILE:
565                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
566                         if (ferror(cstate->copy_file))
567                                 ereport(ERROR,
568                                                 (errcode_for_file_access(),
569                                                  errmsg("could not read from COPY file: %m")));
570                         break;
571                 case COPY_OLD_FE:
572
573                         /*
574                          * We cannot read more than minread bytes (which in practice is 1)
575                          * because old protocol doesn't have any clear way of separating
576                          * the COPY stream from following data.  This is slow, but not any
577                          * slower than the code path was originally, and we don't care
578                          * much anymore about the performance of old protocol.
579                          */
580                         if (pq_getbytes((char *) databuf, minread))
581                         {
582                                 /* Only a \. terminator is legal EOF in old protocol */
583                                 ereport(ERROR,
584                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
585                                                  errmsg("unexpected EOF on client connection with an open transaction")));
586                         }
587                         bytesread = minread;
588                         break;
589                 case COPY_NEW_FE:
590                         while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
591                         {
592                                 int                     avail;
593
594                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
595                                 {
596                                         /* Try to receive another message */
597                                         int                     mtype;
598
599                         readmessage:
600                                         mtype = pq_getbyte();
601                                         if (mtype == EOF)
602                                                 ereport(ERROR,
603                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
604                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
605                                         if (pq_getmessage(cstate->fe_msgbuf, 0))
606                                                 ereport(ERROR,
607                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
608                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
609                                         switch (mtype)
610                                         {
611                                                 case 'd':               /* CopyData */
612                                                         break;
613                                                 case 'c':               /* CopyDone */
614                                                         /* COPY IN correctly terminated by frontend */
615                                                         cstate->fe_eof = true;
616                                                         return bytesread;
617                                                 case 'f':               /* CopyFail */
618                                                         ereport(ERROR,
619                                                                         (errcode(ERRCODE_QUERY_CANCELED),
620                                                                          errmsg("COPY from stdin failed: %s",
621                                                                            pq_getmsgstring(cstate->fe_msgbuf))));
622                                                         break;
623                                                 case 'H':               /* Flush */
624                                                 case 'S':               /* Sync */
625
626                                                         /*
627                                                          * Ignore Flush/Sync for the convenience of client
628                                                          * libraries (such as libpq) that may send those
629                                                          * without noticing that the command they just
630                                                          * sent was COPY.
631                                                          */
632                                                         goto readmessage;
633                                                 default:
634                                                         ereport(ERROR,
635                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
636                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
637                                                                                         mtype)));
638                                                         break;
639                                         }
640                                 }
641                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
642                                 if (avail > maxread)
643                                         avail = maxread;
644                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
645                                 databuf = (void *) ((char *) databuf + avail);
646                                 maxread -= avail;
647                                 bytesread += avail;
648                         }
649                         break;
650         }
651
652         return bytesread;
653 }
654
655
656 /*
657  * These functions do apply some data conversion
658  */
659
660 /*
661  * CopySendInt32 sends an int32 in network byte order
662  */
663 static void
664 CopySendInt32(CopyState cstate, int32 val)
665 {
666         uint32          buf;
667
668         buf = htonl((uint32) val);
669         CopySendData(cstate, &buf, sizeof(buf));
670 }
671
672 /*
673  * CopyGetInt32 reads an int32 that appears in network byte order
674  *
675  * Returns true if OK, false if EOF
676  */
677 static bool
678 CopyGetInt32(CopyState cstate, int32 *val)
679 {
680         uint32          buf;
681
682         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
683         {
684                 *val = 0;                               /* suppress compiler warning */
685                 return false;
686         }
687         *val = (int32) ntohl(buf);
688         return true;
689 }
690
691 /*
692  * CopySendInt16 sends an int16 in network byte order
693  */
694 static void
695 CopySendInt16(CopyState cstate, int16 val)
696 {
697         uint16          buf;
698
699         buf = htons((uint16) val);
700         CopySendData(cstate, &buf, sizeof(buf));
701 }
702
703 /*
704  * CopyGetInt16 reads an int16 that appears in network byte order
705  */
706 static bool
707 CopyGetInt16(CopyState cstate, int16 *val)
708 {
709         uint16          buf;
710
711         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
712         {
713                 *val = 0;                               /* suppress compiler warning */
714                 return false;
715         }
716         *val = (int16) ntohs(buf);
717         return true;
718 }
719
720
721 /*
722  * CopyLoadRawBuf loads some more data into raw_buf
723  *
724  * Returns TRUE if able to obtain at least one more byte, else FALSE.
725  *
726  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
727  * down to the start of the buffer and then we load more data after that.
728  * This case is used only when a frontend multibyte character crosses a
729  * bufferload boundary.
730  */
731 static bool
732 CopyLoadRawBuf(CopyState cstate)
733 {
734         int                     nbytes;
735         int                     inbytes;
736
737         if (cstate->raw_buf_index < cstate->raw_buf_len)
738         {
739                 /* Copy down the unprocessed data */
740                 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
741                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
742                                 nbytes);
743         }
744         else
745                 nbytes = 0;                             /* no data need be saved */
746
747         inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
748                                                   1, RAW_BUF_SIZE - nbytes);
749         nbytes += inbytes;
750         cstate->raw_buf[nbytes] = '\0';
751         cstate->raw_buf_index = 0;
752         cstate->raw_buf_len = nbytes;
753         return (inbytes > 0);
754 }
755
756
757 /*
758  *       DoCopy executes the SQL COPY statement
759  *
760  * Either unload or reload contents of table <relation>, depending on <from>.
761  * (<from> = TRUE means we are inserting into the table.)  In the "TO" case
762  * we also support copying the output of an arbitrary SELECT query.
763  *
764  * If <pipe> is false, transfer is between the table and the file named
765  * <filename>.  Otherwise, transfer is between the table and our regular
766  * input/output stream. The latter could be either stdin/stdout or a
767  * socket, depending on whether we're running under Postmaster control.
768  *
769  * Do not allow a Postgres user without superuser privilege to read from
770  * or write to a file.
771  *
772  * Do not allow the copy if user doesn't have proper permission to access
773  * the table or the specifically requested columns.
774  */
775 Oid
776 DoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed)
777 {
778         CopyState       cstate;
779         bool            is_from = stmt->is_from;
780         bool            pipe = (stmt->filename == NULL);
781         Relation        rel;
782         Oid         relid;
783
784         /* Disallow COPY to/from file or program except to superusers. */
785         if (!pipe && !superuser())
786         {
787                 if (stmt->is_program)
788                         ereport(ERROR,
789                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
790                          errmsg("must be superuser to COPY to or from an external program"),
791                                          errhint("Anyone can COPY to stdout or from stdin. "
792                                                          "psql's \\copy command also works for anyone.")));
793                 else
794                         ereport(ERROR,
795                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
796                                          errmsg("must be superuser to COPY to or from a file"),
797                                          errhint("Anyone can COPY to stdout or from stdin. "
798                                                          "psql's \\copy command also works for anyone.")));
799         }
800
801         if (stmt->relation)
802         {
803                 TupleDesc       tupDesc;
804                 AclMode         required_access = (is_from ? ACL_INSERT : ACL_SELECT);
805                 RangeTblEntry *rte;
806                 List       *attnums;
807                 ListCell   *cur;
808
809                 Assert(!stmt->query);
810
811                 /* Open and lock the relation, using the appropriate lock type. */
812                 rel = heap_openrv(stmt->relation,
813                                                   (is_from ? RowExclusiveLock : AccessShareLock));
814
815                 relid = RelationGetRelid(rel);
816
817                 rte = makeNode(RangeTblEntry);
818                 rte->rtekind = RTE_RELATION;
819                 rte->relid = RelationGetRelid(rel);
820                 rte->relkind = rel->rd_rel->relkind;
821                 rte->requiredPerms = required_access;
822
823                 tupDesc = RelationGetDescr(rel);
824                 attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
825                 foreach(cur, attnums)
826                 {
827                         int                     attno = lfirst_int(cur) -
828                         FirstLowInvalidHeapAttributeNumber;
829
830                         if (is_from)
831                                 rte->modifiedCols = bms_add_member(rte->modifiedCols, attno);
832                         else
833                                 rte->selectedCols = bms_add_member(rte->selectedCols, attno);
834                 }
835                 ExecCheckRTPerms(list_make1(rte), true);
836         }
837         else
838         {
839                 Assert(stmt->query);
840
841                 relid = InvalidOid;
842                 rel = NULL;
843         }
844
845         if (is_from)
846         {
847                 Assert(rel);
848
849                 /* check read-only transaction */
850                 if (XactReadOnly && !rel->rd_islocaltemp)
851                         PreventCommandIfReadOnly("COPY FROM");
852
853                 cstate = BeginCopyFrom(rel, stmt->filename, stmt->is_program,
854                                                            stmt->attlist, stmt->options);
855                 *processed = CopyFrom(cstate);  /* copy from file to database */
856                 EndCopyFrom(cstate);
857         }
858         else
859         {
860                 cstate = BeginCopyTo(rel, stmt->query, queryString,
861                                                          stmt->filename, stmt->is_program,
862                                                          stmt->attlist, stmt->options);
863                 *processed = DoCopyTo(cstate);  /* copy from database to file */
864                 EndCopyTo(cstate);
865         }
866
867         /*
868          * Close the relation. If reading, we can release the AccessShareLock we
869          * got; if writing, we should hold the lock until end of transaction to
870          * ensure that updates will be committed before lock is released.
871          */
872         if (rel != NULL)
873                 heap_close(rel, (is_from ? NoLock : AccessShareLock));
874
875         return relid;
876 }
877
878 /*
879  * Process the statement option list for COPY.
880  *
881  * Scan the options list (a list of DefElem) and transpose the information
882  * into cstate, applying appropriate error checking.
883  *
884  * cstate is assumed to be filled with zeroes initially.
885  *
886  * This is exported so that external users of the COPY API can sanity-check
887  * a list of options.  In that usage, cstate should be passed as NULL
888  * (since external users don't know sizeof(CopyStateData)) and the collected
889  * data is just leaked until CurrentMemoryContext is reset.
890  *
891  * Note that additional checking, such as whether column names listed in FORCE
892  * QUOTE actually exist, has to be applied later.  This just checks for
893  * self-consistency of the options list.
894  */
895 void
896 ProcessCopyOptions(CopyState cstate,
897                                    bool is_from,
898                                    List *options)
899 {
900         bool            format_specified = false;
901         ListCell   *option;
902
903         /* Support external use for option sanity checking */
904         if (cstate == NULL)
905                 cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
906
907         cstate->file_encoding = -1;
908
909         /* Extract options from the statement node tree */
910         foreach(option, options)
911         {
912                 DefElem    *defel = (DefElem *) lfirst(option);
913
914                 if (strcmp(defel->defname, "format") == 0)
915                 {
916                         char       *fmt = defGetString(defel);
917
918                         if (format_specified)
919                                 ereport(ERROR,
920                                                 (errcode(ERRCODE_SYNTAX_ERROR),
921                                                  errmsg("conflicting or redundant options")));
922                         format_specified = true;
923                         if (strcmp(fmt, "text") == 0)
924                                  /* default format */ ;
925                         else if (strcmp(fmt, "csv") == 0)
926                                 cstate->csv_mode = true;
927                         else if (strcmp(fmt, "binary") == 0)
928                                 cstate->binary = true;
929                         else
930                                 ereport(ERROR,
931                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
932                                                  errmsg("COPY format \"%s\" not recognized", fmt)));
933                 }
934                 else if (strcmp(defel->defname, "oids") == 0)
935                 {
936                         if (cstate->oids)
937                                 ereport(ERROR,
938                                                 (errcode(ERRCODE_SYNTAX_ERROR),
939                                                  errmsg("conflicting or redundant options")));
940                         cstate->oids = defGetBoolean(defel);
941                 }
942                 else if (strcmp(defel->defname, "freeze") == 0)
943                 {
944                         if (cstate->freeze)
945                                 ereport(ERROR,
946                                                 (errcode(ERRCODE_SYNTAX_ERROR),
947                                                  errmsg("conflicting or redundant options")));
948                         cstate->freeze = defGetBoolean(defel);
949                 }
950                 else if (strcmp(defel->defname, "delimiter") == 0)
951                 {
952                         if (cstate->delim)
953                                 ereport(ERROR,
954                                                 (errcode(ERRCODE_SYNTAX_ERROR),
955                                                  errmsg("conflicting or redundant options")));
956                         cstate->delim = defGetString(defel);
957                 }
958                 else if (strcmp(defel->defname, "null") == 0)
959                 {
960                         if (cstate->null_print)
961                                 ereport(ERROR,
962                                                 (errcode(ERRCODE_SYNTAX_ERROR),
963                                                  errmsg("conflicting or redundant options")));
964                         cstate->null_print = defGetString(defel);
965                 }
966                 else if (strcmp(defel->defname, "header") == 0)
967                 {
968                         if (cstate->header_line)
969                                 ereport(ERROR,
970                                                 (errcode(ERRCODE_SYNTAX_ERROR),
971                                                  errmsg("conflicting or redundant options")));
972                         cstate->header_line = defGetBoolean(defel);
973                 }
974                 else if (strcmp(defel->defname, "quote") == 0)
975                 {
976                         if (cstate->quote)
977                                 ereport(ERROR,
978                                                 (errcode(ERRCODE_SYNTAX_ERROR),
979                                                  errmsg("conflicting or redundant options")));
980                         cstate->quote = defGetString(defel);
981                 }
982                 else if (strcmp(defel->defname, "escape") == 0)
983                 {
984                         if (cstate->escape)
985                                 ereport(ERROR,
986                                                 (errcode(ERRCODE_SYNTAX_ERROR),
987                                                  errmsg("conflicting or redundant options")));
988                         cstate->escape = defGetString(defel);
989                 }
990                 else if (strcmp(defel->defname, "force_quote") == 0)
991                 {
992                         if (cstate->force_quote || cstate->force_quote_all)
993                                 ereport(ERROR,
994                                                 (errcode(ERRCODE_SYNTAX_ERROR),
995                                                  errmsg("conflicting or redundant options")));
996                         if (defel->arg && IsA(defel->arg, A_Star))
997                                 cstate->force_quote_all = true;
998                         else if (defel->arg && IsA(defel->arg, List))
999                                 cstate->force_quote = (List *) defel->arg;
1000                         else
1001                                 ereport(ERROR,
1002                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1003                                                  errmsg("argument to option \"%s\" must be a list of column names",
1004                                                                 defel->defname)));
1005                 }
1006                 else if (strcmp(defel->defname, "force_not_null") == 0)
1007                 {
1008                         if (cstate->force_notnull)
1009                                 ereport(ERROR,
1010                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1011                                                  errmsg("conflicting or redundant options")));
1012                         if (defel->arg && IsA(defel->arg, List))
1013                                 cstate->force_notnull = (List *) defel->arg;
1014                         else
1015                                 ereport(ERROR,
1016                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1017                                                  errmsg("argument to option \"%s\" must be a list of column names",
1018                                                                 defel->defname)));
1019                 }
1020                 else if (strcmp(defel->defname, "convert_selectively") == 0)
1021                 {
1022                         /*
1023                          * Undocumented, not-accessible-from-SQL option: convert only
1024                          * the named columns to binary form, storing the rest as NULLs.
1025                          * It's allowed for the column list to be NIL.
1026                          */
1027                         if (cstate->convert_selectively)
1028                                 ereport(ERROR,
1029                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1030                                                  errmsg("conflicting or redundant options")));
1031                         cstate->convert_selectively = true;
1032                         if (defel->arg == NULL || IsA(defel->arg, List))
1033                                 cstate->convert_select = (List *) defel->arg;
1034                         else
1035                                 ereport(ERROR,
1036                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1037                                                  errmsg("argument to option \"%s\" must be a list of column names",
1038                                                                 defel->defname)));
1039                 }
1040                 else if (strcmp(defel->defname, "encoding") == 0)
1041                 {
1042                         if (cstate->file_encoding >= 0)
1043                                 ereport(ERROR,
1044                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1045                                                  errmsg("conflicting or redundant options")));
1046                         cstate->file_encoding = pg_char_to_encoding(defGetString(defel));
1047                         if (cstate->file_encoding < 0)
1048                                 ereport(ERROR,
1049                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1050                                                  errmsg("argument to option \"%s\" must be a valid encoding name",
1051                                                                 defel->defname)));
1052                 }
1053                 else
1054                         ereport(ERROR,
1055                                         (errcode(ERRCODE_SYNTAX_ERROR),
1056                                          errmsg("option \"%s\" not recognized",
1057                                                         defel->defname)));
1058         }
1059
1060         /*
1061          * Check for incompatible options (must do these two before inserting
1062          * defaults)
1063          */
1064         if (cstate->binary && cstate->delim)
1065                 ereport(ERROR,
1066                                 (errcode(ERRCODE_SYNTAX_ERROR),
1067                                  errmsg("cannot specify DELIMITER in BINARY mode")));
1068
1069         if (cstate->binary && cstate->null_print)
1070                 ereport(ERROR,
1071                                 (errcode(ERRCODE_SYNTAX_ERROR),
1072                                  errmsg("cannot specify NULL in BINARY mode")));
1073
1074         /* Set defaults for omitted options */
1075         if (!cstate->delim)
1076                 cstate->delim = cstate->csv_mode ? "," : "\t";
1077
1078         if (!cstate->null_print)
1079                 cstate->null_print = cstate->csv_mode ? "" : "\\N";
1080         cstate->null_print_len = strlen(cstate->null_print);
1081
1082         if (cstate->csv_mode)
1083         {
1084                 if (!cstate->quote)
1085                         cstate->quote = "\"";
1086                 if (!cstate->escape)
1087                         cstate->escape = cstate->quote;
1088         }
1089
1090         /* Only single-byte delimiter strings are supported. */
1091         if (strlen(cstate->delim) != 1)
1092                 ereport(ERROR,
1093                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1094                           errmsg("COPY delimiter must be a single one-byte character")));
1095
1096         /* Disallow end-of-line characters */
1097         if (strchr(cstate->delim, '\r') != NULL ||
1098                 strchr(cstate->delim, '\n') != NULL)
1099                 ereport(ERROR,
1100                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1101                          errmsg("COPY delimiter cannot be newline or carriage return")));
1102
1103         if (strchr(cstate->null_print, '\r') != NULL ||
1104                 strchr(cstate->null_print, '\n') != NULL)
1105                 ereport(ERROR,
1106                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1107                                  errmsg("COPY null representation cannot use newline or carriage return")));
1108
1109         /*
1110          * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
1111          * backslash because it would be ambiguous.  We can't allow the other
1112          * cases because data characters matching the delimiter must be
1113          * backslashed, and certain backslash combinations are interpreted
1114          * non-literally by COPY IN.  Disallowing all lower case ASCII letters is
1115          * more than strictly necessary, but seems best for consistency and
1116          * future-proofing.  Likewise we disallow all digits though only octal
1117          * digits are actually dangerous.
1118          */
1119         if (!cstate->csv_mode &&
1120                 strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
1121                            cstate->delim[0]) != NULL)
1122                 ereport(ERROR,
1123                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1124                                  errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
1125
1126         /* Check header */
1127         if (!cstate->csv_mode && cstate->header_line)
1128                 ereport(ERROR,
1129                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1130                                  errmsg("COPY HEADER available only in CSV mode")));
1131
1132         /* Check quote */
1133         if (!cstate->csv_mode && cstate->quote != NULL)
1134                 ereport(ERROR,
1135                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1136                                  errmsg("COPY quote available only in CSV mode")));
1137
1138         if (cstate->csv_mode && strlen(cstate->quote) != 1)
1139                 ereport(ERROR,
1140                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1141                                  errmsg("COPY quote must be a single one-byte character")));
1142
1143         if (cstate->csv_mode && cstate->delim[0] == cstate->quote[0])
1144                 ereport(ERROR,
1145                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1146                                  errmsg("COPY delimiter and quote must be different")));
1147
1148         /* Check escape */
1149         if (!cstate->csv_mode && cstate->escape != NULL)
1150                 ereport(ERROR,
1151                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1152                                  errmsg("COPY escape available only in CSV mode")));
1153
1154         if (cstate->csv_mode && strlen(cstate->escape) != 1)
1155                 ereport(ERROR,
1156                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1157                                  errmsg("COPY escape must be a single one-byte character")));
1158
1159         /* Check force_quote */
1160         if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all))
1161                 ereport(ERROR,
1162                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1163                                  errmsg("COPY force quote available only in CSV mode")));
1164         if ((cstate->force_quote || cstate->force_quote_all) && is_from)
1165                 ereport(ERROR,
1166                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1167                                  errmsg("COPY force quote only available using COPY TO")));
1168
1169         /* Check force_notnull */
1170         if (!cstate->csv_mode && cstate->force_notnull != NIL)
1171                 ereport(ERROR,
1172                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1173                                  errmsg("COPY force not null available only in CSV mode")));
1174         if (cstate->force_notnull != NIL && !is_from)
1175                 ereport(ERROR,
1176                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1177                           errmsg("COPY force not null only available using COPY FROM")));
1178
1179         /* Don't allow the delimiter to appear in the null string. */
1180         if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
1181                 ereport(ERROR,
1182                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1183                 errmsg("COPY delimiter must not appear in the NULL specification")));
1184
1185         /* Don't allow the CSV quote char to appear in the null string. */
1186         if (cstate->csv_mode &&
1187                 strchr(cstate->null_print, cstate->quote[0]) != NULL)
1188                 ereport(ERROR,
1189                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1190                                  errmsg("CSV quote character must not appear in the NULL specification")));
1191 }
1192
1193 /*
1194  * Common setup routines used by BeginCopyFrom and BeginCopyTo.
1195  *
1196  * Iff <binary>, unload or reload in the binary format, as opposed to the
1197  * more wasteful but more robust and portable text format.
1198  *
1199  * Iff <oids>, unload or reload the format that includes OID information.
1200  * On input, we accept OIDs whether or not the table has an OID column,
1201  * but silently drop them if it does not.  On output, we report an error
1202  * if the user asks for OIDs in a table that has none (not providing an
1203  * OID column might seem friendlier, but could seriously confuse programs).
1204  *
1205  * If in the text format, delimit columns with delimiter <delim> and print
1206  * NULL values as <null_print>.
1207  */
1208 static CopyState
1209 BeginCopy(bool is_from,
1210                   Relation rel,
1211                   Node *raw_query,
1212                   const char *queryString,
1213                   List *attnamelist,
1214                   List *options)
1215 {
1216         CopyState       cstate;
1217         TupleDesc       tupDesc;
1218         int                     num_phys_attrs;
1219         MemoryContext oldcontext;
1220
1221         /* Allocate workspace and zero all fields */
1222         cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
1223
1224         /*
1225          * We allocate everything used by a cstate in a new memory context. This
1226          * avoids memory leaks during repeated use of COPY in a query.
1227          */
1228         cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
1229                                                                                                 "COPY",
1230                                                                                                 ALLOCSET_DEFAULT_MINSIZE,
1231                                                                                                 ALLOCSET_DEFAULT_INITSIZE,
1232                                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
1233
1234         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1235
1236         /* Extract options from the statement node tree */
1237         ProcessCopyOptions(cstate, is_from, options);
1238
1239         /* Process the source/target relation or query */
1240         if (rel)
1241         {
1242                 Assert(!raw_query);
1243
1244                 cstate->rel = rel;
1245
1246                 tupDesc = RelationGetDescr(cstate->rel);
1247
1248                 /* Don't allow COPY w/ OIDs to or from a table without them */
1249                 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
1250                         ereport(ERROR,
1251                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
1252                                          errmsg("table \"%s\" does not have OIDs",
1253                                                         RelationGetRelationName(cstate->rel))));
1254         }
1255         else
1256         {
1257                 List       *rewritten;
1258                 Query      *query;
1259                 PlannedStmt *plan;
1260                 DestReceiver *dest;
1261
1262                 Assert(!is_from);
1263                 cstate->rel = NULL;
1264
1265                 /* Don't allow COPY w/ OIDs from a select */
1266                 if (cstate->oids)
1267                         ereport(ERROR,
1268                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1269                                          errmsg("COPY (SELECT) WITH OIDS is not supported")));
1270
1271                 /*
1272                  * Run parse analysis and rewrite.      Note this also acquires sufficient
1273                  * locks on the source table(s).
1274                  *
1275                  * Because the parser and planner tend to scribble on their input, we
1276                  * make a preliminary copy of the source querytree.  This prevents
1277                  * problems in the case that the COPY is in a portal or plpgsql
1278                  * function and is executed repeatedly.  (See also the same hack in
1279                  * DECLARE CURSOR and PREPARE.)  XXX FIXME someday.
1280                  */
1281                 rewritten = pg_analyze_and_rewrite((Node *) copyObject(raw_query),
1282                                                                                    queryString, NULL, 0);
1283
1284                 /* We don't expect more or less than one result query */
1285                 if (list_length(rewritten) != 1)
1286                         elog(ERROR, "unexpected rewrite result");
1287
1288                 query = (Query *) linitial(rewritten);
1289
1290                 /* The grammar allows SELECT INTO, but we don't support that */
1291                 if (query->utilityStmt != NULL &&
1292                         IsA(query->utilityStmt, CreateTableAsStmt))
1293                         ereport(ERROR,
1294                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1295                                          errmsg("COPY (SELECT INTO) is not supported")));
1296
1297                 Assert(query->commandType == CMD_SELECT);
1298                 Assert(query->utilityStmt == NULL);
1299
1300                 /* plan the query */
1301                 plan = planner(query, 0, NULL);
1302
1303                 /*
1304                  * Use a snapshot with an updated command ID to ensure this query sees
1305                  * results of any previously executed queries.
1306                  */
1307                 PushCopiedSnapshot(GetActiveSnapshot());
1308                 UpdateActiveSnapshotCommandId();
1309
1310                 /* Create dest receiver for COPY OUT */
1311                 dest = CreateDestReceiver(DestCopyOut);
1312                 ((DR_copy *) dest)->cstate = cstate;
1313
1314                 /* Create a QueryDesc requesting no output */
1315                 cstate->queryDesc = CreateQueryDesc(plan, queryString,
1316                                                                                         GetActiveSnapshot(),
1317                                                                                         InvalidSnapshot,
1318                                                                                         dest, NULL, 0);
1319
1320                 /*
1321                  * Call ExecutorStart to prepare the plan for execution.
1322                  *
1323                  * ExecutorStart computes a result tupdesc for us
1324                  */
1325                 ExecutorStart(cstate->queryDesc, 0);
1326
1327                 tupDesc = cstate->queryDesc->tupDesc;
1328         }
1329
1330         /* Generate or convert list of attributes to process */
1331         cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1332
1333         num_phys_attrs = tupDesc->natts;
1334
1335         /* Convert FORCE QUOTE name list to per-column flags, check validity */
1336         cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1337         if (cstate->force_quote_all)
1338         {
1339                 int                     i;
1340
1341                 for (i = 0; i < num_phys_attrs; i++)
1342                         cstate->force_quote_flags[i] = true;
1343         }
1344         else if (cstate->force_quote)
1345         {
1346                 List       *attnums;
1347                 ListCell   *cur;
1348
1349                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote);
1350
1351                 foreach(cur, attnums)
1352                 {
1353                         int                     attnum = lfirst_int(cur);
1354
1355                         if (!list_member_int(cstate->attnumlist, attnum))
1356                                 ereport(ERROR,
1357                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1358                                    errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
1359                                                   NameStr(tupDesc->attrs[attnum - 1]->attname))));
1360                         cstate->force_quote_flags[attnum - 1] = true;
1361                 }
1362         }
1363
1364         /* Convert FORCE NOT NULL name list to per-column flags, check validity */
1365         cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1366         if (cstate->force_notnull)
1367         {
1368                 List       *attnums;
1369                 ListCell   *cur;
1370
1371                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull);
1372
1373                 foreach(cur, attnums)
1374                 {
1375                         int                     attnum = lfirst_int(cur);
1376
1377                         if (!list_member_int(cstate->attnumlist, attnum))
1378                                 ereport(ERROR,
1379                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1380                                 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
1381                                            NameStr(tupDesc->attrs[attnum - 1]->attname))));
1382                         cstate->force_notnull_flags[attnum - 1] = true;
1383                 }
1384         }
1385
1386         /* Convert convert_selectively name list to per-column flags */
1387         if (cstate->convert_selectively)
1388         {
1389                 List       *attnums;
1390                 ListCell   *cur;
1391
1392                 cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1393
1394                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
1395
1396                 foreach(cur, attnums)
1397                 {
1398                         int                     attnum = lfirst_int(cur);
1399
1400                         if (!list_member_int(cstate->attnumlist, attnum))
1401                                 ereport(ERROR,
1402                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1403                                                  errmsg_internal("selected column \"%s\" not referenced by COPY",
1404                                                                                  NameStr(tupDesc->attrs[attnum - 1]->attname))));
1405                         cstate->convert_select_flags[attnum - 1] = true;
1406                 }
1407         }
1408
1409         /* Use client encoding when ENCODING option is not specified. */
1410         if (cstate->file_encoding < 0)
1411                 cstate->file_encoding = pg_get_client_encoding();
1412
1413         /*
1414          * Set up encoding conversion info.  Even if the file and server encodings
1415          * are the same, we must apply pg_any_to_server() to validate data in
1416          * multibyte encodings.
1417          */
1418         cstate->need_transcoding =
1419                 (cstate->file_encoding != GetDatabaseEncoding() ||
1420                  pg_database_encoding_max_length() > 1);
1421         /* See Multibyte encoding comment above */
1422         cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
1423
1424         cstate->copy_dest = COPY_FILE;          /* default */
1425
1426         MemoryContextSwitchTo(oldcontext);
1427
1428         return cstate;
1429 }
1430
1431 /*
1432  * Closes the pipe to an external program, checking the pclose() return code.
1433  */
1434 static void
1435 ClosePipeToProgram(CopyState cstate)
1436 {
1437         int pclose_rc;
1438
1439         Assert(cstate->is_program);
1440
1441         pclose_rc = ClosePipeStream(cstate->copy_file);
1442         if (pclose_rc == -1)
1443                 ereport(ERROR,
1444                                 (errmsg("could not close pipe to external command: %m")));
1445         else if (pclose_rc != 0)
1446                 ereport(ERROR,
1447                                 (errmsg("program \"%s\" failed",
1448                                                 cstate->filename),
1449                                  errdetail_internal("%s", wait_result_to_str(pclose_rc))));
1450 }
1451
1452 /*
1453  * Release resources allocated in a cstate for COPY TO/FROM.
1454  */
1455 static void
1456 EndCopy(CopyState cstate)
1457 {
1458         if (cstate->is_program)
1459         {
1460                 ClosePipeToProgram(cstate);
1461         }
1462         else
1463         {
1464                 if (cstate->filename != NULL && FreeFile(cstate->copy_file))
1465                         ereport(ERROR,
1466                                         (errcode_for_file_access(),
1467                                          errmsg("could not close file \"%s\": %m",
1468                                                         cstate->filename)));
1469         }
1470
1471         MemoryContextDelete(cstate->copycontext);
1472         pfree(cstate);
1473 }
1474
1475 /*
1476  * Setup CopyState to read tuples from a table or a query for COPY TO.
1477  */
1478 static CopyState
1479 BeginCopyTo(Relation rel,
1480                         Node *query,
1481                         const char *queryString,
1482                         const char *filename,
1483                         bool  is_program,
1484                         List *attnamelist,
1485                         List *options)
1486 {
1487         CopyState       cstate;
1488         bool            pipe = (filename == NULL);
1489         MemoryContext oldcontext;
1490
1491         if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
1492         {
1493                 if (rel->rd_rel->relkind == RELKIND_VIEW)
1494                         ereport(ERROR,
1495                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1496                                          errmsg("cannot copy from view \"%s\"",
1497                                                         RelationGetRelationName(rel)),
1498                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1499                 else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
1500                         ereport(ERROR,
1501                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1502                                          errmsg("cannot copy from materialized view \"%s\"",
1503                                                         RelationGetRelationName(rel)),
1504                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1505                 else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1506                         ereport(ERROR,
1507                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1508                                          errmsg("cannot copy from foreign table \"%s\"",
1509                                                         RelationGetRelationName(rel)),
1510                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1511                 else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
1512                         ereport(ERROR,
1513                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1514                                          errmsg("cannot copy from sequence \"%s\"",
1515                                                         RelationGetRelationName(rel))));
1516                 else
1517                         ereport(ERROR,
1518                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1519                                          errmsg("cannot copy from non-table relation \"%s\"",
1520                                                         RelationGetRelationName(rel))));
1521         }
1522
1523         cstate = BeginCopy(false, rel, query, queryString, attnamelist, options);
1524         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1525
1526         if (pipe)
1527         {
1528                 Assert(!is_program);    /* the grammar does not allow this */
1529                 if (whereToSendOutput != DestRemote)
1530                         cstate->copy_file = stdout;
1531         }
1532         else
1533         {
1534                 cstate->filename = pstrdup(filename);
1535                 cstate->is_program = is_program;
1536
1537                 if (is_program)
1538                 {
1539                         cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
1540                         if (cstate->copy_file == NULL)
1541                                 ereport(ERROR,
1542                                                 (errmsg("could not execute command \"%s\": %m",
1543                                                                 cstate->filename)));
1544                 }
1545                 else
1546                 {
1547                         mode_t          oumask;         /* Pre-existing umask value */
1548                         struct stat st;
1549
1550                         /*
1551                          * Prevent write to relative path ... too easy to shoot oneself in
1552                          * the foot by overwriting a database file ...
1553                          */
1554                         if (!is_absolute_path(filename))
1555                                 ereport(ERROR,
1556                                                 (errcode(ERRCODE_INVALID_NAME),
1557                                                  errmsg("relative path not allowed for COPY to file")));
1558
1559                         oumask = umask(S_IWGRP | S_IWOTH);
1560                         cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1561                         umask(oumask);
1562                         if (cstate->copy_file == NULL)
1563                                 ereport(ERROR,
1564                                                 (errcode_for_file_access(),
1565                                                  errmsg("could not open file \"%s\" for writing: %m",
1566                                                                 cstate->filename)));
1567
1568                         fstat(fileno(cstate->copy_file), &st);
1569                         if (S_ISDIR(st.st_mode))
1570                                 ereport(ERROR,
1571                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1572                                                  errmsg("\"%s\" is a directory", cstate->filename)));
1573                 }
1574         }
1575
1576         MemoryContextSwitchTo(oldcontext);
1577
1578         return cstate;
1579 }
1580
1581 /*
1582  * This intermediate routine exists mainly to localize the effects of setjmp
1583  * so we don't need to plaster a lot of variables with "volatile".
1584  */
1585 static uint64
1586 DoCopyTo(CopyState cstate)
1587 {
1588         bool            pipe = (cstate->filename == NULL);
1589         bool            fe_copy = (pipe && whereToSendOutput == DestRemote);
1590         uint64          processed;
1591
1592         PG_TRY();
1593         {
1594                 if (fe_copy)
1595                         SendCopyBegin(cstate);
1596
1597                 processed = CopyTo(cstate);
1598
1599                 if (fe_copy)
1600                         SendCopyEnd(cstate);
1601         }
1602         PG_CATCH();
1603         {
1604                 /*
1605                  * Make sure we turn off old-style COPY OUT mode upon error. It is
1606                  * okay to do this in all cases, since it does nothing if the mode is
1607                  * not on.
1608                  */
1609                 pq_endcopyout(true);
1610                 PG_RE_THROW();
1611         }
1612         PG_END_TRY();
1613
1614         return processed;
1615 }
1616
1617 /*
1618  * Clean up storage and release resources for COPY TO.
1619  */
1620 static void
1621 EndCopyTo(CopyState cstate)
1622 {
1623         if (cstate->queryDesc != NULL)
1624         {
1625                 /* Close down the query and free resources. */
1626                 ExecutorFinish(cstate->queryDesc);
1627                 ExecutorEnd(cstate->queryDesc);
1628                 FreeQueryDesc(cstate->queryDesc);
1629                 PopActiveSnapshot();
1630         }
1631
1632         /* Clean up storage */
1633         EndCopy(cstate);
1634 }
1635
1636 /*
1637  * Copy from relation or query TO file.
1638  */
1639 static uint64
1640 CopyTo(CopyState cstate)
1641 {
1642         TupleDesc       tupDesc;
1643         int                     num_phys_attrs;
1644         Form_pg_attribute *attr;
1645         ListCell   *cur;
1646         uint64          processed;
1647
1648         if (cstate->rel)
1649                 tupDesc = RelationGetDescr(cstate->rel);
1650         else
1651                 tupDesc = cstate->queryDesc->tupDesc;
1652         attr = tupDesc->attrs;
1653         num_phys_attrs = tupDesc->natts;
1654         cstate->null_print_client = cstate->null_print;         /* default */
1655
1656         /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1657         cstate->fe_msgbuf = makeStringInfo();
1658
1659         /* Get info about the columns we need to process. */
1660         cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1661         foreach(cur, cstate->attnumlist)
1662         {
1663                 int                     attnum = lfirst_int(cur);
1664                 Oid                     out_func_oid;
1665                 bool            isvarlena;
1666
1667                 if (cstate->binary)
1668                         getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1669                                                                         &out_func_oid,
1670                                                                         &isvarlena);
1671                 else
1672                         getTypeOutputInfo(attr[attnum - 1]->atttypid,
1673                                                           &out_func_oid,
1674                                                           &isvarlena);
1675                 fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1676         }
1677
1678         /*
1679          * Create a temporary memory context that we can reset once per row to
1680          * recover palloc'd memory.  This avoids any problems with leaks inside
1681          * datatype output routines, and should be faster than retail pfree's
1682          * anyway.      (We don't need a whole econtext as CopyFrom does.)
1683          */
1684         cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1685                                                                                            "COPY TO",
1686                                                                                            ALLOCSET_DEFAULT_MINSIZE,
1687                                                                                            ALLOCSET_DEFAULT_INITSIZE,
1688                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1689
1690         if (cstate->binary)
1691         {
1692                 /* Generate header for a binary copy */
1693                 int32           tmp;
1694
1695                 /* Signature */
1696                 CopySendData(cstate, BinarySignature, 11);
1697                 /* Flags field */
1698                 tmp = 0;
1699                 if (cstate->oids)
1700                         tmp |= (1 << 16);
1701                 CopySendInt32(cstate, tmp);
1702                 /* No header extension */
1703                 tmp = 0;
1704                 CopySendInt32(cstate, tmp);
1705         }
1706         else
1707         {
1708                 /*
1709                  * For non-binary copy, we need to convert null_print to file
1710                  * encoding, because it will be sent directly with CopySendString.
1711                  */
1712                 if (cstate->need_transcoding)
1713                         cstate->null_print_client = pg_server_to_any(cstate->null_print,
1714                                                                                                           cstate->null_print_len,
1715                                                                                                           cstate->file_encoding);
1716
1717                 /* if a header has been requested send the line */
1718                 if (cstate->header_line)
1719                 {
1720                         bool            hdr_delim = false;
1721
1722                         foreach(cur, cstate->attnumlist)
1723                         {
1724                                 int                     attnum = lfirst_int(cur);
1725                                 char       *colname;
1726
1727                                 if (hdr_delim)
1728                                         CopySendChar(cstate, cstate->delim[0]);
1729                                 hdr_delim = true;
1730
1731                                 colname = NameStr(attr[attnum - 1]->attname);
1732
1733                                 CopyAttributeOutCSV(cstate, colname, false,
1734                                                                         list_length(cstate->attnumlist) == 1);
1735                         }
1736
1737                         CopySendEndOfRow(cstate);
1738                 }
1739         }
1740
1741         if (cstate->rel)
1742         {
1743                 Datum      *values;
1744                 bool       *nulls;
1745                 HeapScanDesc scandesc;
1746                 HeapTuple       tuple;
1747
1748                 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1749                 nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
1750
1751                 scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1752
1753                 processed = 0;
1754                 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1755                 {
1756                         CHECK_FOR_INTERRUPTS();
1757
1758                         /* Deconstruct the tuple ... faster than repeated heap_getattr */
1759                         heap_deform_tuple(tuple, tupDesc, values, nulls);
1760
1761                         /* Format and send the data */
1762                         CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
1763                         processed++;
1764                 }
1765
1766                 heap_endscan(scandesc);
1767
1768                 pfree(values);
1769                 pfree(nulls);
1770         }
1771         else
1772         {
1773                 /* run the plan --- the dest receiver will send tuples */
1774                 ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
1775                 processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1776         }
1777
1778         if (cstate->binary)
1779         {
1780                 /* Generate trailer for a binary copy */
1781                 CopySendInt16(cstate, -1);
1782                 /* Need to flush out the trailer */
1783                 CopySendEndOfRow(cstate);
1784         }
1785
1786         MemoryContextDelete(cstate->rowcontext);
1787
1788         return processed;
1789 }
1790
1791 /*
1792  * Emit one row during CopyTo().
1793  */
1794 static void
1795 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
1796 {
1797         bool            need_delim = false;
1798         FmgrInfo   *out_functions = cstate->out_functions;
1799         MemoryContext oldcontext;
1800         ListCell   *cur;
1801         char       *string;
1802
1803         MemoryContextReset(cstate->rowcontext);
1804         oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1805
1806         if (cstate->binary)
1807         {
1808                 /* Binary per-tuple header */
1809                 CopySendInt16(cstate, list_length(cstate->attnumlist));
1810                 /* Send OID if wanted --- note attnumlist doesn't include it */
1811                 if (cstate->oids)
1812                 {
1813                         /* Hack --- assume Oid is same size as int32 */
1814                         CopySendInt32(cstate, sizeof(int32));
1815                         CopySendInt32(cstate, tupleOid);
1816                 }
1817         }
1818         else
1819         {
1820                 /* Text format has no per-tuple header, but send OID if wanted */
1821                 /* Assume digits don't need any quoting or encoding conversion */
1822                 if (cstate->oids)
1823                 {
1824                         string = DatumGetCString(DirectFunctionCall1(oidout,
1825                                                                                                 ObjectIdGetDatum(tupleOid)));
1826                         CopySendString(cstate, string);
1827                         need_delim = true;
1828                 }
1829         }
1830
1831         foreach(cur, cstate->attnumlist)
1832         {
1833                 int                     attnum = lfirst_int(cur);
1834                 Datum           value = values[attnum - 1];
1835                 bool            isnull = nulls[attnum - 1];
1836
1837                 if (!cstate->binary)
1838                 {
1839                         if (need_delim)
1840                                 CopySendChar(cstate, cstate->delim[0]);
1841                         need_delim = true;
1842                 }
1843
1844                 if (isnull)
1845                 {
1846                         if (!cstate->binary)
1847                                 CopySendString(cstate, cstate->null_print_client);
1848                         else
1849                                 CopySendInt32(cstate, -1);
1850                 }
1851                 else
1852                 {
1853                         if (!cstate->binary)
1854                         {
1855                                 string = OutputFunctionCall(&out_functions[attnum - 1],
1856                                                                                         value);
1857                                 if (cstate->csv_mode)
1858                                         CopyAttributeOutCSV(cstate, string,
1859                                                                                 cstate->force_quote_flags[attnum - 1],
1860                                                                                 list_length(cstate->attnumlist) == 1);
1861                                 else
1862                                         CopyAttributeOutText(cstate, string);
1863                         }
1864                         else
1865                         {
1866                                 bytea      *outputbytes;
1867
1868                                 outputbytes = SendFunctionCall(&out_functions[attnum - 1],
1869                                                                                            value);
1870                                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1871                                 CopySendData(cstate, VARDATA(outputbytes),
1872                                                          VARSIZE(outputbytes) - VARHDRSZ);
1873                         }
1874                 }
1875         }
1876
1877         CopySendEndOfRow(cstate);
1878
1879         MemoryContextSwitchTo(oldcontext);
1880 }
1881
1882
1883 /*
1884  * error context callback for COPY FROM
1885  *
1886  * The argument for the error context must be CopyState.
1887  */
1888 void
1889 CopyFromErrorCallback(void *arg)
1890 {
1891         CopyState       cstate = (CopyState) arg;
1892
1893         if (cstate->binary)
1894         {
1895                 /* can't usefully display the data */
1896                 if (cstate->cur_attname)
1897                         errcontext("COPY %s, line %d, column %s",
1898                                            cstate->cur_relname, cstate->cur_lineno,
1899                                            cstate->cur_attname);
1900                 else
1901                         errcontext("COPY %s, line %d",
1902                                            cstate->cur_relname, cstate->cur_lineno);
1903         }
1904         else
1905         {
1906                 if (cstate->cur_attname && cstate->cur_attval)
1907                 {
1908                         /* error is relevant to a particular column */
1909                         char       *attval;
1910
1911                         attval = limit_printout_length(cstate->cur_attval);
1912                         errcontext("COPY %s, line %d, column %s: \"%s\"",
1913                                            cstate->cur_relname, cstate->cur_lineno,
1914                                            cstate->cur_attname, attval);
1915                         pfree(attval);
1916                 }
1917                 else if (cstate->cur_attname)
1918                 {
1919                         /* error is relevant to a particular column, value is NULL */
1920                         errcontext("COPY %s, line %d, column %s: null input",
1921                                            cstate->cur_relname, cstate->cur_lineno,
1922                                            cstate->cur_attname);
1923                 }
1924                 else
1925                 {
1926                         /* error is relevant to a particular line */
1927                         if (cstate->line_buf_converted || !cstate->need_transcoding)
1928                         {
1929                                 char       *lineval;
1930
1931                                 lineval = limit_printout_length(cstate->line_buf.data);
1932                                 errcontext("COPY %s, line %d: \"%s\"",
1933                                                    cstate->cur_relname, cstate->cur_lineno, lineval);
1934                                 pfree(lineval);
1935                         }
1936                         else
1937                         {
1938                                 /*
1939                                  * Here, the line buffer is still in a foreign encoding, and
1940                                  * indeed it's quite likely that the error is precisely a
1941                                  * failure to do encoding conversion (ie, bad data).  We dare
1942                                  * not try to convert it, and at present there's no way to
1943                                  * regurgitate it without conversion.  So we have to punt and
1944                                  * just report the line number.
1945                                  */
1946                                 errcontext("COPY %s, line %d",
1947                                                    cstate->cur_relname, cstate->cur_lineno);
1948                         }
1949                 }
1950         }
1951 }
1952
1953 /*
1954  * Make sure we don't print an unreasonable amount of COPY data in a message.
1955  *
1956  * It would seem a lot easier to just use the sprintf "precision" limit to
1957  * truncate the string.  However, some versions of glibc have a bug/misfeature
1958  * that vsnprintf will always fail (return -1) if it is asked to truncate
1959  * a string that contains invalid byte sequences for the current encoding.
1960  * So, do our own truncation.  We return a pstrdup'd copy of the input.
1961  */
1962 static char *
1963 limit_printout_length(const char *str)
1964 {
1965 #define MAX_COPY_DATA_DISPLAY 100
1966
1967         int                     slen = strlen(str);
1968         int                     len;
1969         char       *res;
1970
1971         /* Fast path if definitely okay */
1972         if (slen <= MAX_COPY_DATA_DISPLAY)
1973                 return pstrdup(str);
1974
1975         /* Apply encoding-dependent truncation */
1976         len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
1977
1978         /*
1979          * Truncate, and add "..." to show we truncated the input.
1980          */
1981         res = (char *) palloc(len + 4);
1982         memcpy(res, str, len);
1983         strcpy(res + len, "...");
1984
1985         return res;
1986 }
1987
1988 /*
1989  * Copy FROM file to relation.
1990  */
1991 static uint64
1992 CopyFrom(CopyState cstate)
1993 {
1994         HeapTuple       tuple;
1995         TupleDesc       tupDesc;
1996         Datum      *values;
1997         bool       *nulls;
1998         ResultRelInfo *resultRelInfo;
1999         EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
2000         ExprContext *econtext;
2001         TupleTableSlot *myslot;
2002         MemoryContext oldcontext = CurrentMemoryContext;
2003
2004         ErrorContextCallback errcallback;
2005         CommandId       mycid = GetCurrentCommandId(true);
2006         int                     hi_options = 0; /* start with default heap_insert options */
2007         BulkInsertState bistate;
2008         uint64          processed = 0;
2009         bool            useHeapMultiInsert;
2010         int                     nBufferedTuples = 0;
2011
2012 #define MAX_BUFFERED_TUPLES 1000
2013         HeapTuple  *bufferedTuples = NULL;      /* initialize to silence warning */
2014         Size            bufferedTuplesSize = 0;
2015
2016         Assert(cstate->rel);
2017
2018         if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
2019         {
2020                 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
2021                         ereport(ERROR,
2022                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2023                                          errmsg("cannot copy to view \"%s\"",
2024                                                         RelationGetRelationName(cstate->rel))));
2025                 else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
2026                         ereport(ERROR,
2027                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2028                                          errmsg("cannot copy to materialized view \"%s\"",
2029                                                         RelationGetRelationName(cstate->rel))));
2030                 else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2031                         ereport(ERROR,
2032                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2033                                          errmsg("cannot copy to foreign table \"%s\"",
2034                                                         RelationGetRelationName(cstate->rel))));
2035                 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
2036                         ereport(ERROR,
2037                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2038                                          errmsg("cannot copy to sequence \"%s\"",
2039                                                         RelationGetRelationName(cstate->rel))));
2040                 else
2041                         ereport(ERROR,
2042                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2043                                          errmsg("cannot copy to non-table relation \"%s\"",
2044                                                         RelationGetRelationName(cstate->rel))));
2045         }
2046
2047         tupDesc = RelationGetDescr(cstate->rel);
2048
2049         /*----------
2050          * Check to see if we can avoid writing WAL
2051          *
2052          * If archive logging/streaming is not enabled *and* either
2053          *      - table was created in same transaction as this COPY
2054          *      - data is being written to relfilenode created in this transaction
2055          * then we can skip writing WAL.  It's safe because if the transaction
2056          * doesn't commit, we'll discard the table (or the new relfilenode file).
2057          * If it does commit, we'll have done the heap_sync at the bottom of this
2058          * routine first.
2059          *
2060          * As mentioned in comments in utils/rel.h, the in-same-transaction test
2061          * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
2062          * can be cleared before the end of the transaction. The exact case is
2063          * when a relation sets a new relfilenode twice in same transaction, yet
2064          * the second one fails in an aborted subtransaction, e.g.
2065          *
2066          * BEGIN;
2067          * TRUNCATE t;
2068          * SAVEPOINT save;
2069          * TRUNCATE t;
2070          * ROLLBACK TO save;
2071          * COPY ...
2072          *
2073          * Also, if the target file is new-in-transaction, we assume that checking
2074          * FSM for free space is a waste of time, even if we must use WAL because
2075          * of archiving.  This could possibly be wrong, but it's unlikely.
2076          *
2077          * The comments for heap_insert and RelationGetBufferForTuple specify that
2078          * skipping WAL logging is only safe if we ensure that our tuples do not
2079          * go into pages containing tuples from any other transactions --- but this
2080          * must be the case if we have a new table or new relfilenode, so we need
2081          * no additional work to enforce that.
2082          *----------
2083          */
2084         /* createSubid is creation check, newRelfilenodeSubid is truncation check */
2085         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
2086                 cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2087         {
2088                 hi_options |= HEAP_INSERT_SKIP_FSM;
2089                 if (!XLogIsNeeded())
2090                         hi_options |= HEAP_INSERT_SKIP_WAL;
2091         }
2092
2093         /*
2094          * Optimize if new relfilenode was created in this subxact or
2095          * one of its committed children and we won't see those rows later
2096          * as part of an earlier scan or command. This ensures that if this
2097          * subtransaction aborts then the frozen rows won't be visible
2098          * after xact cleanup. Note that the stronger test of exactly
2099          * which subtransaction created it is crucial for correctness
2100          * of this optimisation.
2101          */
2102         if (cstate->freeze)
2103         {
2104                 if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
2105                         ereport(ERROR,
2106                                         (ERRCODE_INVALID_TRANSACTION_STATE,
2107                                         errmsg("cannot perform FREEZE because of prior transaction activity")));
2108
2109                 if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
2110                         cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
2111                         ereport(ERROR,
2112                                         (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE,
2113                                          errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
2114
2115                 hi_options |= HEAP_INSERT_FROZEN;
2116         }
2117
2118         /*
2119          * We need a ResultRelInfo so we can use the regular executor's
2120          * index-entry-making machinery.  (There used to be a huge amount of code
2121          * here that basically duplicated execUtils.c ...)
2122          */
2123         resultRelInfo = makeNode(ResultRelInfo);
2124         resultRelInfo->ri_RangeTableIndex = 1;          /* dummy */
2125         resultRelInfo->ri_RelationDesc = cstate->rel;
2126         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(cstate->rel->trigdesc);
2127         if (resultRelInfo->ri_TrigDesc)
2128         {
2129                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
2130                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
2131                 resultRelInfo->ri_TrigWhenExprs = (List **)
2132                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(List *));
2133         }
2134         resultRelInfo->ri_TrigInstrument = NULL;
2135
2136         ExecOpenIndices(resultRelInfo);
2137
2138         estate->es_result_relations = resultRelInfo;
2139         estate->es_num_result_relations = 1;
2140         estate->es_result_relation_info = resultRelInfo;
2141
2142         /* Set up a tuple slot too */
2143         myslot = ExecInitExtraTupleSlot(estate);
2144         ExecSetSlotDescriptor(myslot, tupDesc);
2145         /* Triggers might need a slot as well */
2146         estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
2147
2148         /*
2149          * It's more efficient to prepare a bunch of tuples for insertion, and
2150          * insert them in one heap_multi_insert() call, than call heap_insert()
2151          * separately for every tuple. However, we can't do that if there are
2152          * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
2153          * expressions. Such triggers or expressions might query the table we're
2154          * inserting to, and act differently if the tuples that have already been
2155          * processed and prepared for insertion are not there.
2156          */
2157         if ((resultRelInfo->ri_TrigDesc != NULL &&
2158                  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2159                   resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
2160                 cstate->volatile_defexprs)
2161         {
2162                 useHeapMultiInsert = false;
2163         }
2164         else
2165         {
2166                 useHeapMultiInsert = true;
2167                 bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
2168         }
2169
2170         /* Prepare to catch AFTER triggers. */
2171         AfterTriggerBeginQuery();
2172
2173         /*
2174          * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
2175          * should do this for COPY, since it's not really an "INSERT" statement as
2176          * such. However, executing these triggers maintains consistency with the
2177          * EACH ROW triggers that we already fire on COPY.
2178          */
2179         ExecBSInsertTriggers(estate, resultRelInfo);
2180
2181         values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
2182         nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
2183
2184         bistate = GetBulkInsertState();
2185         econtext = GetPerTupleExprContext(estate);
2186
2187         /* Set up callback to identify error line number */
2188         errcallback.callback = CopyFromErrorCallback;
2189         errcallback.arg = (void *) cstate;
2190         errcallback.previous = error_context_stack;
2191         error_context_stack = &errcallback;
2192
2193         for (;;)
2194         {
2195                 TupleTableSlot *slot;
2196                 bool            skip_tuple;
2197                 Oid                     loaded_oid = InvalidOid;
2198
2199                 CHECK_FOR_INTERRUPTS();
2200
2201                 if (nBufferedTuples == 0)
2202                 {
2203                         /*
2204                          * Reset the per-tuple exprcontext. We can only do this if the
2205                          * tuple buffer is empty (calling the context the per-tuple memory
2206                          * context is a bit of a misnomer now
2207                          */
2208                         ResetPerTupleExprContext(estate);
2209                 }
2210
2211                 /* Switch into its memory context */
2212                 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2213
2214                 if (!NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid))
2215                         break;
2216
2217                 /* And now we can form the input tuple. */
2218                 tuple = heap_form_tuple(tupDesc, values, nulls);
2219
2220                 if (loaded_oid != InvalidOid)
2221                         HeapTupleSetOid(tuple, loaded_oid);
2222
2223                 /* Triggers and stuff need to be invoked in query context. */
2224                 MemoryContextSwitchTo(oldcontext);
2225
2226                 /* Place tuple in tuple slot --- but slot shouldn't free it */
2227                 slot = myslot;
2228                 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2229
2230                 skip_tuple = false;
2231
2232                 /* BEFORE ROW INSERT Triggers */
2233                 if (resultRelInfo->ri_TrigDesc &&
2234                         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
2235                 {
2236                         slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
2237
2238                         if (slot == NULL)       /* "do nothing" */
2239                                 skip_tuple = true;
2240                         else    /* trigger might have changed tuple */
2241                                 tuple = ExecMaterializeSlot(slot);
2242                 }
2243
2244                 if (!skip_tuple)
2245                 {
2246                         /* Check the constraints of the tuple */
2247                         if (cstate->rel->rd_att->constr)
2248                                 ExecConstraints(resultRelInfo, slot, estate);
2249
2250                         if (useHeapMultiInsert)
2251                         {
2252                                 /* Add this tuple to the tuple buffer */
2253                                 bufferedTuples[nBufferedTuples++] = tuple;
2254                                 bufferedTuplesSize += tuple->t_len;
2255
2256                                 /*
2257                                  * If the buffer filled up, flush it. Also flush if the total
2258                                  * size of all the tuples in the buffer becomes large, to
2259                                  * avoid using large amounts of memory for the buffers when
2260                                  * the tuples are exceptionally wide.
2261                                  */
2262                                 if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
2263                                         bufferedTuplesSize > 65535)
2264                                 {
2265                                         CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2266                                                                                 resultRelInfo, myslot, bistate,
2267                                                                                 nBufferedTuples, bufferedTuples);
2268                                         nBufferedTuples = 0;
2269                                         bufferedTuplesSize = 0;
2270                                 }
2271                         }
2272                         else
2273                         {
2274                                 List       *recheckIndexes = NIL;
2275
2276                                 /* OK, store the tuple and create index entries for it */
2277                                 heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
2278
2279                                 if (resultRelInfo->ri_NumIndices > 0)
2280                                         recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
2281                                                                                                                    estate);
2282
2283                                 /* AFTER ROW INSERT Triggers */
2284                                 ExecARInsertTriggers(estate, resultRelInfo, tuple,
2285                                                                          recheckIndexes);
2286
2287                                 list_free(recheckIndexes);
2288                         }
2289
2290                         /*
2291                          * We count only tuples not suppressed by a BEFORE INSERT trigger;
2292                          * this is the same definition used by execMain.c for counting
2293                          * tuples inserted by an INSERT command.
2294                          */
2295                         processed++;
2296                 }
2297         }
2298
2299         /* Flush any remaining buffered tuples */
2300         if (nBufferedTuples > 0)
2301                 CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2302                                                         resultRelInfo, myslot, bistate,
2303                                                         nBufferedTuples, bufferedTuples);
2304
2305         /* Done, clean up */
2306         error_context_stack = errcallback.previous;
2307
2308         FreeBulkInsertState(bistate);
2309
2310         MemoryContextSwitchTo(oldcontext);
2311
2312         /* Execute AFTER STATEMENT insertion triggers */
2313         ExecASInsertTriggers(estate, resultRelInfo);
2314
2315         /* Handle queued AFTER triggers */
2316         AfterTriggerEndQuery(estate);
2317
2318         pfree(values);
2319         pfree(nulls);
2320
2321         ExecResetTupleTable(estate->es_tupleTable, false);
2322
2323         ExecCloseIndices(resultRelInfo);
2324
2325         FreeExecutorState(estate);
2326
2327         /*
2328          * If we skipped writing WAL, then we need to sync the heap (but not
2329          * indexes since those use WAL anyway)
2330          */
2331         if (hi_options & HEAP_INSERT_SKIP_WAL)
2332                 heap_sync(cstate->rel);
2333
2334         return processed;
2335 }
2336
2337 /*
2338  * A subroutine of CopyFrom, to write the current batch of buffered heap
2339  * tuples to the heap. Also updates indexes and runs AFTER ROW INSERT
2340  * triggers.
2341  */
2342 static void
2343 CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
2344                                         int hi_options, ResultRelInfo *resultRelInfo,
2345                                         TupleTableSlot *myslot, BulkInsertState bistate,
2346                                         int nBufferedTuples, HeapTuple *bufferedTuples)
2347 {
2348         MemoryContext oldcontext;
2349         int                     i;
2350
2351         /*
2352          * heap_multi_insert leaks memory, so switch to short-lived memory context
2353          * before calling it.
2354          */
2355         oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2356         heap_multi_insert(cstate->rel,
2357                                           bufferedTuples,
2358                                           nBufferedTuples,
2359                                           mycid,
2360                                           hi_options,
2361                                           bistate);
2362         MemoryContextSwitchTo(oldcontext);
2363
2364         /*
2365          * If there are any indexes, update them for all the inserted tuples, and
2366          * run AFTER ROW INSERT triggers.
2367          */
2368         if (resultRelInfo->ri_NumIndices > 0)
2369         {
2370                 for (i = 0; i < nBufferedTuples; i++)
2371                 {
2372                         List       *recheckIndexes;
2373
2374                         ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
2375                         recheckIndexes =
2376                                 ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
2377                                                                           estate);
2378                         ExecARInsertTriggers(estate, resultRelInfo,
2379                                                                  bufferedTuples[i],
2380                                                                  recheckIndexes);
2381                         list_free(recheckIndexes);
2382                 }
2383         }
2384
2385         /*
2386          * There's no indexes, but see if we need to run AFTER ROW INSERT triggers
2387          * anyway.
2388          */
2389         else if (resultRelInfo->ri_TrigDesc != NULL &&
2390                          resultRelInfo->ri_TrigDesc->trig_insert_after_row)
2391         {
2392                 for (i = 0; i < nBufferedTuples; i++)
2393                         ExecARInsertTriggers(estate, resultRelInfo,
2394                                                                  bufferedTuples[i],
2395                                                                  NIL);
2396         }
2397 }
2398
2399 /*
2400  * Setup to read tuples from a file for COPY FROM.
2401  *
2402  * 'rel': Used as a template for the tuples
2403  * 'filename': Name of server-local file to read
2404  * 'attnamelist': List of char *, columns to include. NIL selects all cols.
2405  * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
2406  *
2407  * Returns a CopyState, to be passed to NextCopyFrom and related functions.
2408  */
2409 CopyState
2410 BeginCopyFrom(Relation rel,
2411                           const char *filename,
2412                           bool  is_program,
2413                           List *attnamelist,
2414                           List *options)
2415 {
2416         CopyState       cstate;
2417         bool            pipe = (filename == NULL);
2418         TupleDesc       tupDesc;
2419         Form_pg_attribute *attr;
2420         AttrNumber      num_phys_attrs,
2421                                 num_defaults;
2422         FmgrInfo   *in_functions;
2423         Oid                *typioparams;
2424         int                     attnum;
2425         Oid                     in_func_oid;
2426         int                *defmap;
2427         ExprState **defexprs;
2428         MemoryContext oldcontext;
2429         bool            volatile_defexprs;
2430
2431         cstate = BeginCopy(true, rel, NULL, NULL, attnamelist, options);
2432         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
2433
2434         /* Initialize state variables */
2435         cstate->fe_eof = false;
2436         cstate->eol_type = EOL_UNKNOWN;
2437         cstate->cur_relname = RelationGetRelationName(cstate->rel);
2438         cstate->cur_lineno = 0;
2439         cstate->cur_attname = NULL;
2440         cstate->cur_attval = NULL;
2441
2442         /* Set up variables to avoid per-attribute overhead. */
2443         initStringInfo(&cstate->attribute_buf);
2444         initStringInfo(&cstate->line_buf);
2445         cstate->line_buf_converted = false;
2446         cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
2447         cstate->raw_buf_index = cstate->raw_buf_len = 0;
2448
2449         tupDesc = RelationGetDescr(cstate->rel);
2450         attr = tupDesc->attrs;
2451         num_phys_attrs = tupDesc->natts;
2452         num_defaults = 0;
2453         volatile_defexprs = false;
2454
2455         /*
2456          * Pick up the required catalog information for each attribute in the
2457          * relation, including the input function, the element type (to pass to
2458          * the input function), and info about defaults and constraints. (Which
2459          * input function we use depends on text/binary format choice.)
2460          */
2461         in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
2462         typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
2463         defmap = (int *) palloc(num_phys_attrs * sizeof(int));
2464         defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
2465
2466         for (attnum = 1; attnum <= num_phys_attrs; attnum++)
2467         {
2468                 /* We don't need info for dropped attributes */
2469                 if (attr[attnum - 1]->attisdropped)
2470                         continue;
2471
2472                 /* Fetch the input function and typioparam info */
2473                 if (cstate->binary)
2474                         getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
2475                                                                    &in_func_oid, &typioparams[attnum - 1]);
2476                 else
2477                         getTypeInputInfo(attr[attnum - 1]->atttypid,
2478                                                          &in_func_oid, &typioparams[attnum - 1]);
2479                 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
2480
2481                 /* Get default info if needed */
2482                 if (!list_member_int(cstate->attnumlist, attnum))
2483                 {
2484                         /* attribute is NOT to be copied from input */
2485                         /* use default value if one exists */
2486                         Node       *defexpr = build_column_default(cstate->rel, attnum);
2487
2488                         if (defexpr != NULL)
2489                         {
2490                                 /* Initialize expressions in copycontext. */
2491                                 defexprs[num_defaults] = ExecInitExpr(
2492                                                                  expression_planner((Expr *) defexpr), NULL);
2493                                 defmap[num_defaults] = attnum - 1;
2494                                 num_defaults++;
2495
2496                                 if (!volatile_defexprs)
2497                                         volatile_defexprs = contain_volatile_functions(defexpr);
2498                         }
2499                 }
2500         }
2501
2502         /* We keep those variables in cstate. */
2503         cstate->in_functions = in_functions;
2504         cstate->typioparams = typioparams;
2505         cstate->defmap = defmap;
2506         cstate->defexprs = defexprs;
2507         cstate->volatile_defexprs = volatile_defexprs;
2508         cstate->num_defaults = num_defaults;
2509         cstate->is_program = is_program;
2510
2511         if (pipe)
2512         {
2513                 Assert(!is_program);    /* the grammar does not allow this */
2514                 if (whereToSendOutput == DestRemote)
2515                         ReceiveCopyBegin(cstate);
2516                 else
2517                         cstate->copy_file = stdin;
2518         }
2519         else
2520         {
2521                 cstate->filename = pstrdup(filename);
2522
2523                 if (cstate->is_program)
2524                 {
2525                         cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
2526                         if (cstate->copy_file == NULL)
2527                                 ereport(ERROR,
2528                                                 (errmsg("could not execute command \"%s\": %m",
2529                                                                 cstate->filename)));
2530                 }
2531                 else
2532                 {
2533                         struct stat st;
2534
2535                         cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
2536                         if (cstate->copy_file == NULL)
2537                                 ereport(ERROR,
2538                                                 (errcode_for_file_access(),
2539                                                  errmsg("could not open file \"%s\" for reading: %m",
2540                                                                 cstate->filename)));
2541
2542                         fstat(fileno(cstate->copy_file), &st);
2543                         if (S_ISDIR(st.st_mode))
2544                                 ereport(ERROR,
2545                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2546                                                  errmsg("\"%s\" is a directory", cstate->filename)));
2547                 }
2548         }
2549
2550         if (!cstate->binary)
2551         {
2552                 /* must rely on user to tell us... */
2553                 cstate->file_has_oids = cstate->oids;
2554         }
2555         else
2556         {
2557                 /* Read and verify binary header */
2558                 char            readSig[11];
2559                 int32           tmp;
2560
2561                 /* Signature */
2562                 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
2563                         memcmp(readSig, BinarySignature, 11) != 0)
2564                         ereport(ERROR,
2565                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2566                                          errmsg("COPY file signature not recognized")));
2567                 /* Flags field */
2568                 if (!CopyGetInt32(cstate, &tmp))
2569                         ereport(ERROR,
2570                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2571                                          errmsg("invalid COPY file header (missing flags)")));
2572                 cstate->file_has_oids = (tmp & (1 << 16)) != 0;
2573                 tmp &= ~(1 << 16);
2574                 if ((tmp >> 16) != 0)
2575                         ereport(ERROR,
2576                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2577                                  errmsg("unrecognized critical flags in COPY file header")));
2578                 /* Header extension length */
2579                 if (!CopyGetInt32(cstate, &tmp) ||
2580                         tmp < 0)
2581                         ereport(ERROR,
2582                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2583                                          errmsg("invalid COPY file header (missing length)")));
2584                 /* Skip extension header, if present */
2585                 while (tmp-- > 0)
2586                 {
2587                         if (CopyGetData(cstate, readSig, 1, 1) != 1)
2588                                 ereport(ERROR,
2589                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2590                                                  errmsg("invalid COPY file header (wrong length)")));
2591                 }
2592         }
2593
2594         if (cstate->file_has_oids && cstate->binary)
2595         {
2596                 getTypeBinaryInputInfo(OIDOID,
2597                                                            &in_func_oid, &cstate->oid_typioparam);
2598                 fmgr_info(in_func_oid, &cstate->oid_in_function);
2599         }
2600
2601         /* create workspace for CopyReadAttributes results */
2602         if (!cstate->binary)
2603         {
2604                 AttrNumber      attr_count = list_length(cstate->attnumlist);
2605                 int                     nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
2606
2607                 cstate->max_fields = nfields;
2608                 cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
2609         }
2610
2611         MemoryContextSwitchTo(oldcontext);
2612
2613         return cstate;
2614 }
2615
2616 /*
2617  * Read raw fields in the next line for COPY FROM in text or csv mode.
2618  * Return false if no more lines.
2619  *
2620  * An internal temporary buffer is returned via 'fields'. It is valid until
2621  * the next call of the function. Since the function returns all raw fields
2622  * in the input file, 'nfields' could be different from the number of columns
2623  * in the relation.
2624  *
2625  * NOTE: force_not_null option are not applied to the returned fields.
2626  */
2627 bool
2628 NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
2629 {
2630         int                     fldct;
2631         bool            done;
2632
2633         /* only available for text or csv input */
2634         Assert(!cstate->binary);
2635
2636         /* on input just throw the header line away */
2637         if (cstate->cur_lineno == 0 && cstate->header_line)
2638         {
2639                 cstate->cur_lineno++;
2640                 if (CopyReadLine(cstate))
2641                         return false;           /* done */
2642         }
2643
2644         cstate->cur_lineno++;
2645
2646         /* Actually read the line into memory here */
2647         done = CopyReadLine(cstate);
2648
2649         /*
2650          * EOF at start of line means we're done.  If we see EOF after some
2651          * characters, we act as though it was newline followed by EOF, ie,
2652          * process the line and then exit loop on next iteration.
2653          */
2654         if (done && cstate->line_buf.len == 0)
2655                 return false;
2656
2657         /* Parse the line into de-escaped field values */
2658         if (cstate->csv_mode)
2659                 fldct = CopyReadAttributesCSV(cstate);
2660         else
2661                 fldct = CopyReadAttributesText(cstate);
2662
2663         *fields = cstate->raw_fields;
2664         *nfields = fldct;
2665         return true;
2666 }
2667
2668 /*
2669  * Read next tuple from file for COPY FROM. Return false if no more tuples.
2670  *
2671  * 'econtext' is used to evaluate default expression for each columns not
2672  * read from the file. It can be NULL when no default values are used, i.e.
2673  * when all columns are read from the file.
2674  *
2675  * 'values' and 'nulls' arrays must be the same length as columns of the
2676  * relation passed to BeginCopyFrom. This function fills the arrays.
2677  * Oid of the tuple is returned with 'tupleOid' separately.
2678  */
2679 bool
2680 NextCopyFrom(CopyState cstate, ExprContext *econtext,
2681                          Datum *values, bool *nulls, Oid *tupleOid)
2682 {
2683         TupleDesc       tupDesc;
2684         Form_pg_attribute *attr;
2685         AttrNumber      num_phys_attrs,
2686                                 attr_count,
2687                                 num_defaults = cstate->num_defaults;
2688         FmgrInfo   *in_functions = cstate->in_functions;
2689         Oid                *typioparams = cstate->typioparams;
2690         int                     i;
2691         int                     nfields;
2692         bool            isnull;
2693         bool            file_has_oids = cstate->file_has_oids;
2694         int                *defmap = cstate->defmap;
2695         ExprState **defexprs = cstate->defexprs;
2696
2697         tupDesc = RelationGetDescr(cstate->rel);
2698         attr = tupDesc->attrs;
2699         num_phys_attrs = tupDesc->natts;
2700         attr_count = list_length(cstate->attnumlist);
2701         nfields = file_has_oids ? (attr_count + 1) : attr_count;
2702
2703         /* Initialize all values for row to NULL */
2704         MemSet(values, 0, num_phys_attrs * sizeof(Datum));
2705         MemSet(nulls, true, num_phys_attrs * sizeof(bool));
2706
2707         if (!cstate->binary)
2708         {
2709                 char      **field_strings;
2710                 ListCell   *cur;
2711                 int                     fldct;
2712                 int                     fieldno;
2713                 char       *string;
2714
2715                 /* read raw fields in the next line */
2716                 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
2717                         return false;
2718
2719                 /* check for overflowing fields */
2720                 if (nfields > 0 && fldct > nfields)
2721                         ereport(ERROR,
2722                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2723                                          errmsg("extra data after last expected column")));
2724
2725                 fieldno = 0;
2726
2727                 /* Read the OID field if present */
2728                 if (file_has_oids)
2729                 {
2730                         if (fieldno >= fldct)
2731                                 ereport(ERROR,
2732                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2733                                                  errmsg("missing data for OID column")));
2734                         string = field_strings[fieldno++];
2735
2736                         if (string == NULL)
2737                                 ereport(ERROR,
2738                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2739                                                  errmsg("null OID in COPY data")));
2740                         else if (cstate->oids && tupleOid != NULL)
2741                         {
2742                                 cstate->cur_attname = "oid";
2743                                 cstate->cur_attval = string;
2744                                 *tupleOid = DatumGetObjectId(DirectFunctionCall1(oidin,
2745                                                                                                    CStringGetDatum(string)));
2746                                 if (*tupleOid == InvalidOid)
2747                                         ereport(ERROR,
2748                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2749                                                          errmsg("invalid OID in COPY data")));
2750                                 cstate->cur_attname = NULL;
2751                                 cstate->cur_attval = NULL;
2752                         }
2753                 }
2754
2755                 /* Loop to read the user attributes on the line. */
2756                 foreach(cur, cstate->attnumlist)
2757                 {
2758                         int                     attnum = lfirst_int(cur);
2759                         int                     m = attnum - 1;
2760
2761                         if (fieldno >= fldct)
2762                                 ereport(ERROR,
2763                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2764                                                  errmsg("missing data for column \"%s\"",
2765                                                                 NameStr(attr[m]->attname))));
2766                         string = field_strings[fieldno++];
2767
2768                         if (cstate->convert_select_flags &&
2769                                 !cstate->convert_select_flags[m])
2770                         {
2771                                 /* ignore input field, leaving column as NULL */
2772                                 continue;
2773                         }
2774
2775                         if (cstate->csv_mode && string == NULL &&
2776                                 cstate->force_notnull_flags[m])
2777                         {
2778                                 /* Go ahead and read the NULL string */
2779                                 string = cstate->null_print;
2780                         }
2781
2782                         cstate->cur_attname = NameStr(attr[m]->attname);
2783                         cstate->cur_attval = string;
2784                         values[m] = InputFunctionCall(&in_functions[m],
2785                                                                                   string,
2786                                                                                   typioparams[m],
2787                                                                                   attr[m]->atttypmod);
2788                         if (string != NULL)
2789                                 nulls[m] = false;
2790                         cstate->cur_attname = NULL;
2791                         cstate->cur_attval = NULL;
2792                 }
2793
2794                 Assert(fieldno == nfields);
2795         }
2796         else
2797         {
2798                 /* binary */
2799                 int16           fld_count;
2800                 ListCell   *cur;
2801
2802                 cstate->cur_lineno++;
2803
2804                 if (!CopyGetInt16(cstate, &fld_count))
2805                 {
2806                         /* EOF detected (end of file, or protocol-level EOF) */
2807                         return false;
2808                 }
2809
2810                 if (fld_count == -1)
2811                 {
2812                         /*
2813                          * Received EOF marker.  In a V3-protocol copy, wait for the
2814                          * protocol-level EOF, and complain if it doesn't come
2815                          * immediately.  This ensures that we correctly handle CopyFail,
2816                          * if client chooses to send that now.
2817                          *
2818                          * Note that we MUST NOT try to read more data in an old-protocol
2819                          * copy, since there is no protocol-level EOF marker then.      We
2820                          * could go either way for copy from file, but choose to throw
2821                          * error if there's data after the EOF marker, for consistency
2822                          * with the new-protocol case.
2823                          */
2824                         char            dummy;
2825
2826                         if (cstate->copy_dest != COPY_OLD_FE &&
2827                                 CopyGetData(cstate, &dummy, 1, 1) > 0)
2828                                 ereport(ERROR,
2829                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2830                                                  errmsg("received copy data after EOF marker")));
2831                         return false;
2832                 }
2833
2834                 if (fld_count != attr_count)
2835                         ereport(ERROR,
2836                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2837                                          errmsg("row field count is %d, expected %d",
2838                                                         (int) fld_count, attr_count)));
2839
2840                 if (file_has_oids)
2841                 {
2842                         Oid                     loaded_oid;
2843
2844                         cstate->cur_attname = "oid";
2845                         loaded_oid =
2846                                 DatumGetObjectId(CopyReadBinaryAttribute(cstate,
2847                                                                                                                  0,
2848                                                                                                         &cstate->oid_in_function,
2849                                                                                                           cstate->oid_typioparam,
2850                                                                                                                  -1,
2851                                                                                                                  &isnull));
2852                         if (isnull || loaded_oid == InvalidOid)
2853                                 ereport(ERROR,
2854                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2855                                                  errmsg("invalid OID in COPY data")));
2856                         cstate->cur_attname = NULL;
2857                         if (cstate->oids && tupleOid != NULL)
2858                                 *tupleOid = loaded_oid;
2859                 }
2860
2861                 i = 0;
2862                 foreach(cur, cstate->attnumlist)
2863                 {
2864                         int                     attnum = lfirst_int(cur);
2865                         int                     m = attnum - 1;
2866
2867                         cstate->cur_attname = NameStr(attr[m]->attname);
2868                         i++;
2869                         values[m] = CopyReadBinaryAttribute(cstate,
2870                                                                                                 i,
2871                                                                                                 &in_functions[m],
2872                                                                                                 typioparams[m],
2873                                                                                                 attr[m]->atttypmod,
2874                                                                                                 &nulls[m]);
2875                         cstate->cur_attname = NULL;
2876                 }
2877         }
2878
2879         /*
2880          * Now compute and insert any defaults available for the columns not
2881          * provided by the input data.  Anything not processed here or above will
2882          * remain NULL.
2883          */
2884         for (i = 0; i < num_defaults; i++)
2885         {
2886                 /*
2887                  * The caller must supply econtext and have switched into the
2888                  * per-tuple memory context in it.
2889                  */
2890                 Assert(econtext != NULL);
2891                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
2892
2893                 values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
2894                                                                                  &nulls[defmap[i]], NULL);
2895         }
2896
2897         return true;
2898 }
2899
2900 /*
2901  * Clean up storage and release resources for COPY FROM.
2902  */
2903 void
2904 EndCopyFrom(CopyState cstate)
2905 {
2906         /* No COPY FROM related resources except memory. */
2907
2908         EndCopy(cstate);
2909 }
2910
2911 /*
2912  * Read the next input line and stash it in line_buf, with conversion to
2913  * server encoding.
2914  *
2915  * Result is true if read was terminated by EOF, false if terminated
2916  * by newline.  The terminating newline or EOF marker is not included
2917  * in the final value of line_buf.
2918  */
2919 static bool
2920 CopyReadLine(CopyState cstate)
2921 {
2922         bool            result;
2923
2924         resetStringInfo(&cstate->line_buf);
2925
2926         /* Mark that encoding conversion hasn't occurred yet */
2927         cstate->line_buf_converted = false;
2928
2929         /* Parse data and transfer into line_buf */
2930         result = CopyReadLineText(cstate);
2931
2932         if (result)
2933         {
2934                 /*
2935                  * Reached EOF.  In protocol version 3, we should ignore anything
2936                  * after \. up to the protocol end of copy data.  (XXX maybe better
2937                  * not to treat \. as special?)
2938                  */
2939                 if (cstate->copy_dest == COPY_NEW_FE)
2940                 {
2941                         do
2942                         {
2943                                 cstate->raw_buf_index = cstate->raw_buf_len;
2944                         } while (CopyLoadRawBuf(cstate));
2945                 }
2946         }
2947         else
2948         {
2949                 /*
2950                  * If we didn't hit EOF, then we must have transferred the EOL marker
2951                  * to line_buf along with the data.  Get rid of it.
2952                  */
2953                 switch (cstate->eol_type)
2954                 {
2955                         case EOL_NL:
2956                                 Assert(cstate->line_buf.len >= 1);
2957                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2958                                 cstate->line_buf.len--;
2959                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2960                                 break;
2961                         case EOL_CR:
2962                                 Assert(cstate->line_buf.len >= 1);
2963                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
2964                                 cstate->line_buf.len--;
2965                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2966                                 break;
2967                         case EOL_CRNL:
2968                                 Assert(cstate->line_buf.len >= 2);
2969                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
2970                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2971                                 cstate->line_buf.len -= 2;
2972                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2973                                 break;
2974                         case EOL_UNKNOWN:
2975                                 /* shouldn't get here */
2976                                 Assert(false);
2977                                 break;
2978                 }
2979         }
2980
2981         /* Done reading the line.  Convert it to server encoding. */
2982         if (cstate->need_transcoding)
2983         {
2984                 char       *cvt;
2985
2986                 cvt = pg_any_to_server(cstate->line_buf.data,
2987                                                            cstate->line_buf.len,
2988                                                            cstate->file_encoding);
2989                 if (cvt != cstate->line_buf.data)
2990                 {
2991                         /* transfer converted data back to line_buf */
2992                         resetStringInfo(&cstate->line_buf);
2993                         appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
2994                         pfree(cvt);
2995                 }
2996         }
2997
2998         /* Now it's safe to use the buffer in error messages */
2999         cstate->line_buf_converted = true;
3000
3001         return result;
3002 }
3003
3004 /*
3005  * CopyReadLineText - inner loop of CopyReadLine for text mode
3006  */
3007 static bool
3008 CopyReadLineText(CopyState cstate)
3009 {
3010         char       *copy_raw_buf;
3011         int                     raw_buf_ptr;
3012         int                     copy_buf_len;
3013         bool            need_data = false;
3014         bool            hit_eof = false;
3015         bool            result = false;
3016         char            mblen_str[2];
3017
3018         /* CSV variables */
3019         bool            first_char_in_line = true;
3020         bool            in_quote = false,
3021                                 last_was_esc = false;
3022         char            quotec = '\0';
3023         char            escapec = '\0';
3024
3025         if (cstate->csv_mode)
3026         {
3027                 quotec = cstate->quote[0];
3028                 escapec = cstate->escape[0];
3029                 /* ignore special escape processing if it's the same as quotec */
3030                 if (quotec == escapec)
3031                         escapec = '\0';
3032         }
3033
3034         mblen_str[1] = '\0';
3035
3036         /*
3037          * The objective of this loop is to transfer the entire next input line
3038          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
3039          * \n) and the end-of-copy marker (\.).
3040          *
3041          * In CSV mode, \r and \n inside a quoted field are just part of the data
3042          * value and are put in line_buf.  We keep just enough state to know if we
3043          * are currently in a quoted field or not.
3044          *
3045          * These four characters, and the CSV escape and quote characters, are
3046          * assumed the same in frontend and backend encodings.
3047          *
3048          * For speed, we try to move data from raw_buf to line_buf in chunks
3049          * rather than one character at a time.  raw_buf_ptr points to the next
3050          * character to examine; any characters from raw_buf_index to raw_buf_ptr
3051          * have been determined to be part of the line, but not yet transferred to
3052          * line_buf.
3053          *
3054          * For a little extra speed within the loop, we copy raw_buf and
3055          * raw_buf_len into local variables.
3056          */
3057         copy_raw_buf = cstate->raw_buf;
3058         raw_buf_ptr = cstate->raw_buf_index;
3059         copy_buf_len = cstate->raw_buf_len;
3060
3061         for (;;)
3062         {
3063                 int                     prev_raw_ptr;
3064                 char            c;
3065
3066                 /*
3067                  * Load more data if needed.  Ideally we would just force four bytes
3068                  * of read-ahead and avoid the many calls to
3069                  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
3070                  * does not allow us to read too far ahead or we might read into the
3071                  * next data, so we read-ahead only as far we know we can.      One
3072                  * optimization would be to read-ahead four byte here if
3073                  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
3074                  * considering the size of the buffer.
3075                  */
3076                 if (raw_buf_ptr >= copy_buf_len || need_data)
3077                 {
3078                         REFILL_LINEBUF;
3079
3080                         /*
3081                          * Try to read some more data.  This will certainly reset
3082                          * raw_buf_index to zero, and raw_buf_ptr must go with it.
3083                          */
3084                         if (!CopyLoadRawBuf(cstate))
3085                                 hit_eof = true;
3086                         raw_buf_ptr = 0;
3087                         copy_buf_len = cstate->raw_buf_len;
3088
3089                         /*
3090                          * If we are completely out of data, break out of the loop,
3091                          * reporting EOF.
3092                          */
3093                         if (copy_buf_len <= 0)
3094                         {
3095                                 result = true;
3096                                 break;
3097                         }
3098                         need_data = false;
3099                 }
3100
3101                 /* OK to fetch a character */
3102                 prev_raw_ptr = raw_buf_ptr;
3103                 c = copy_raw_buf[raw_buf_ptr++];
3104
3105                 if (cstate->csv_mode)
3106                 {
3107                         /*
3108                          * If character is '\\' or '\r', we may need to look ahead below.
3109                          * Force fetch of the next character if we don't already have it.
3110                          * We need to do this before changing CSV state, in case one of
3111                          * these characters is also the quote or escape character.
3112                          *
3113                          * Note: old-protocol does not like forced prefetch, but it's OK
3114                          * here since we cannot validly be at EOF.
3115                          */
3116                         if (c == '\\' || c == '\r')
3117                         {
3118                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3119                         }
3120
3121                         /*
3122                          * Dealing with quotes and escapes here is mildly tricky. If the
3123                          * quote char is also the escape char, there's no problem - we
3124                          * just use the char as a toggle. If they are different, we need
3125                          * to ensure that we only take account of an escape inside a
3126                          * quoted field and immediately preceding a quote char, and not
3127                          * the second in a escape-escape sequence.
3128                          */
3129                         if (in_quote && c == escapec)
3130                                 last_was_esc = !last_was_esc;
3131                         if (c == quotec && !last_was_esc)
3132                                 in_quote = !in_quote;
3133                         if (c != escapec)
3134                                 last_was_esc = false;
3135
3136                         /*
3137                          * Updating the line count for embedded CR and/or LF chars is
3138                          * necessarily a little fragile - this test is probably about the
3139                          * best we can do.      (XXX it's arguable whether we should do this
3140                          * at all --- is cur_lineno a physical or logical count?)
3141                          */
3142                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
3143                                 cstate->cur_lineno++;
3144                 }
3145
3146                 /* Process \r */
3147                 if (c == '\r' && (!cstate->csv_mode || !in_quote))
3148                 {
3149                         /* Check for \r\n on first line, _and_ handle \r\n. */
3150                         if (cstate->eol_type == EOL_UNKNOWN ||
3151                                 cstate->eol_type == EOL_CRNL)
3152                         {
3153                                 /*
3154                                  * If need more data, go back to loop top to load it.
3155                                  *
3156                                  * Note that if we are at EOF, c will wind up as '\0' because
3157                                  * of the guaranteed pad of raw_buf.
3158                                  */
3159                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3160
3161                                 /* get next char */
3162                                 c = copy_raw_buf[raw_buf_ptr];
3163
3164                                 if (c == '\n')
3165                                 {
3166                                         raw_buf_ptr++;          /* eat newline */
3167                                         cstate->eol_type = EOL_CRNL;            /* in case not set yet */
3168                                 }
3169                                 else
3170                                 {
3171                                         /* found \r, but no \n */
3172                                         if (cstate->eol_type == EOL_CRNL)
3173                                                 ereport(ERROR,
3174                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3175                                                                  !cstate->csv_mode ?
3176                                                         errmsg("literal carriage return found in data") :
3177                                                         errmsg("unquoted carriage return found in data"),
3178                                                                  !cstate->csv_mode ?
3179                                                 errhint("Use \"\\r\" to represent carriage return.") :
3180                                                                  errhint("Use quoted CSV field to represent carriage return.")));
3181
3182                                         /*
3183                                          * if we got here, it is the first line and we didn't find
3184                                          * \n, so don't consume the peeked character
3185                                          */
3186                                         cstate->eol_type = EOL_CR;
3187                                 }
3188                         }
3189                         else if (cstate->eol_type == EOL_NL)
3190                                 ereport(ERROR,
3191                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3192                                                  !cstate->csv_mode ?
3193                                                  errmsg("literal carriage return found in data") :
3194                                                  errmsg("unquoted carriage return found in data"),
3195                                                  !cstate->csv_mode ?
3196                                            errhint("Use \"\\r\" to represent carriage return.") :
3197                                                  errhint("Use quoted CSV field to represent carriage return.")));
3198                         /* If reach here, we have found the line terminator */
3199                         break;
3200                 }
3201
3202                 /* Process \n */
3203                 if (c == '\n' && (!cstate->csv_mode || !in_quote))
3204                 {
3205                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
3206                                 ereport(ERROR,
3207                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3208                                                  !cstate->csv_mode ?
3209                                                  errmsg("literal newline found in data") :
3210                                                  errmsg("unquoted newline found in data"),
3211                                                  !cstate->csv_mode ?
3212                                                  errhint("Use \"\\n\" to represent newline.") :
3213                                          errhint("Use quoted CSV field to represent newline.")));
3214                         cstate->eol_type = EOL_NL;      /* in case not set yet */
3215                         /* If reach here, we have found the line terminator */
3216                         break;
3217                 }
3218
3219                 /*
3220                  * In CSV mode, we only recognize \. alone on a line.  This is because
3221                  * \. is a valid CSV data value.
3222                  */
3223                 if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
3224                 {
3225                         char            c2;
3226
3227                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3228                         IF_NEED_REFILL_AND_EOF_BREAK(0);
3229
3230                         /* -----
3231                          * get next character
3232                          * Note: we do not change c so if it isn't \., we can fall
3233                          * through and continue processing for file encoding.
3234                          * -----
3235                          */
3236                         c2 = copy_raw_buf[raw_buf_ptr];
3237
3238                         if (c2 == '.')
3239                         {
3240                                 raw_buf_ptr++;  /* consume the '.' */
3241
3242                                 /*
3243                                  * Note: if we loop back for more data here, it does not
3244                                  * matter that the CSV state change checks are re-executed; we
3245                                  * will come back here with no important state changed.
3246                                  */
3247                                 if (cstate->eol_type == EOL_CRNL)
3248                                 {
3249                                         /* Get the next character */
3250                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3251                                         /* if hit_eof, c2 will become '\0' */
3252                                         c2 = copy_raw_buf[raw_buf_ptr++];
3253
3254                                         if (c2 == '\n')
3255                                         {
3256                                                 if (!cstate->csv_mode)
3257                                                         ereport(ERROR,
3258                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3259                                                                          errmsg("end-of-copy marker does not match previous newline style")));
3260                                                 else
3261                                                         NO_END_OF_COPY_GOTO;
3262                                         }
3263                                         else if (c2 != '\r')
3264                                         {
3265                                                 if (!cstate->csv_mode)
3266                                                         ereport(ERROR,
3267                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3268                                                                          errmsg("end-of-copy marker corrupt")));
3269                                                 else
3270                                                         NO_END_OF_COPY_GOTO;
3271                                         }
3272                                 }
3273
3274                                 /* Get the next character */
3275                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3276                                 /* if hit_eof, c2 will become '\0' */
3277                                 c2 = copy_raw_buf[raw_buf_ptr++];
3278
3279                                 if (c2 != '\r' && c2 != '\n')
3280                                 {
3281                                         if (!cstate->csv_mode)
3282                                                 ereport(ERROR,
3283                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3284                                                                  errmsg("end-of-copy marker corrupt")));
3285                                         else
3286                                                 NO_END_OF_COPY_GOTO;
3287                                 }
3288
3289                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
3290                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
3291                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
3292                                 {
3293                                         ereport(ERROR,
3294                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3295                                                          errmsg("end-of-copy marker does not match previous newline style")));
3296                                 }
3297
3298                                 /*
3299                                  * Transfer only the data before the \. into line_buf, then
3300                                  * discard the data and the \. sequence.
3301                                  */
3302                                 if (prev_raw_ptr > cstate->raw_buf_index)
3303                                         appendBinaryStringInfo(&cstate->line_buf,
3304                                                                          cstate->raw_buf + cstate->raw_buf_index,
3305                                                                            prev_raw_ptr - cstate->raw_buf_index);
3306                                 cstate->raw_buf_index = raw_buf_ptr;
3307                                 result = true;  /* report EOF */
3308                                 break;
3309                         }
3310                         else if (!cstate->csv_mode)
3311
3312                                 /*
3313                                  * If we are here, it means we found a backslash followed by
3314                                  * something other than a period.  In non-CSV mode, anything
3315                                  * after a backslash is special, so we skip over that second
3316                                  * character too.  If we didn't do that \\. would be
3317                                  * considered an eof-of copy, while in non-CSV mode it is a
3318                                  * literal backslash followed by a period.      In CSV mode,
3319                                  * backslashes are not special, so we want to process the
3320                                  * character after the backslash just like a normal character,
3321                                  * so we don't increment in those cases.
3322                                  */
3323                                 raw_buf_ptr++;
3324                 }
3325
3326                 /*
3327                  * This label is for CSV cases where \. appears at the start of a
3328                  * line, but there is more text after it, meaning it was a data value.
3329                  * We are more strict for \. in CSV mode because \. could be a data
3330                  * value, while in non-CSV mode, \. cannot be a data value.
3331                  */
3332 not_end_of_copy:
3333
3334                 /*
3335                  * Process all bytes of a multi-byte character as a group.
3336                  *
3337                  * We only support multi-byte sequences where the first byte has the
3338                  * high-bit set, so as an optimization we can avoid this block
3339                  * entirely if it is not set.
3340                  */
3341                 if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3342                 {
3343                         int                     mblen;
3344
3345                         mblen_str[0] = c;
3346                         /* All our encodings only read the first byte to get the length */
3347                         mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
3348                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
3349                         IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
3350                         raw_buf_ptr += mblen - 1;
3351                 }
3352                 first_char_in_line = false;
3353         }                                                       /* end of outer loop */
3354
3355         /*
3356          * Transfer any still-uncopied data to line_buf.
3357          */
3358         REFILL_LINEBUF;
3359
3360         return result;
3361 }
3362
3363 /*
3364  *      Return decimal value for a hexadecimal digit
3365  */
3366 static int
3367 GetDecimalFromHex(char hex)
3368 {
3369         if (isdigit((unsigned char) hex))
3370                 return hex - '0';
3371         else
3372                 return tolower((unsigned char) hex) - 'a' + 10;
3373 }
3374
3375 /*
3376  * Parse the current line into separate attributes (fields),
3377  * performing de-escaping as needed.
3378  *
3379  * The input is in line_buf.  We use attribute_buf to hold the result
3380  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
3381  * string, or NULL when the input matches the null marker string.
3382  * This array is expanded as necessary.
3383  *
3384  * (Note that the caller cannot check for nulls since the returned
3385  * string would be the post-de-escaping equivalent, which may look
3386  * the same as some valid data string.)
3387  *
3388  * delim is the column delimiter string (must be just one byte for now).
3389  * null_print is the null marker string.  Note that this is compared to
3390  * the pre-de-escaped input string.
3391  *
3392  * The return value is the number of fields actually read.
3393  */
3394 static int
3395 CopyReadAttributesText(CopyState cstate)
3396 {
3397         char            delimc = cstate->delim[0];
3398         int                     fieldno;
3399         char       *output_ptr;
3400         char       *cur_ptr;
3401         char       *line_end_ptr;
3402
3403         /*
3404          * We need a special case for zero-column tables: check that the input
3405          * line is empty, and return.
3406          */
3407         if (cstate->max_fields <= 0)
3408         {
3409                 if (cstate->line_buf.len != 0)
3410                         ereport(ERROR,
3411                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3412                                          errmsg("extra data after last expected column")));
3413                 return 0;
3414         }
3415
3416         resetStringInfo(&cstate->attribute_buf);
3417
3418         /*
3419          * The de-escaped attributes will certainly not be longer than the input
3420          * data line, so we can just force attribute_buf to be large enough and
3421          * then transfer data without any checks for enough space.      We need to do
3422          * it this way because enlarging attribute_buf mid-stream would invalidate
3423          * pointers already stored into cstate->raw_fields[].
3424          */
3425         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3426                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3427         output_ptr = cstate->attribute_buf.data;
3428
3429         /* set pointer variables for loop */
3430         cur_ptr = cstate->line_buf.data;
3431         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3432
3433         /* Outer loop iterates over fields */
3434         fieldno = 0;
3435         for (;;)
3436         {
3437                 bool            found_delim = false;
3438                 char       *start_ptr;
3439                 char       *end_ptr;
3440                 int                     input_len;
3441                 bool            saw_non_ascii = false;
3442
3443                 /* Make sure there is enough space for the next value */
3444                 if (fieldno >= cstate->max_fields)
3445                 {
3446                         cstate->max_fields *= 2;
3447                         cstate->raw_fields =
3448                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3449                 }
3450
3451                 /* Remember start of field on both input and output sides */
3452                 start_ptr = cur_ptr;
3453                 cstate->raw_fields[fieldno] = output_ptr;
3454
3455                 /*
3456                  * Scan data for field.
3457                  *
3458                  * Note that in this loop, we are scanning to locate the end of field
3459                  * and also speculatively performing de-escaping.  Once we find the
3460                  * end-of-field, we can match the raw field contents against the null
3461                  * marker string.  Only after that comparison fails do we know that
3462                  * de-escaping is actually the right thing to do; therefore we *must
3463                  * not* throw any syntax errors before we've done the null-marker
3464                  * check.
3465                  */
3466                 for (;;)
3467                 {
3468                         char            c;
3469
3470                         end_ptr = cur_ptr;
3471                         if (cur_ptr >= line_end_ptr)
3472                                 break;
3473                         c = *cur_ptr++;
3474                         if (c == delimc)
3475                         {
3476                                 found_delim = true;
3477                                 break;
3478                         }
3479                         if (c == '\\')
3480                         {
3481                                 if (cur_ptr >= line_end_ptr)
3482                                         break;
3483                                 c = *cur_ptr++;
3484                                 switch (c)
3485                                 {
3486                                         case '0':
3487                                         case '1':
3488                                         case '2':
3489                                         case '3':
3490                                         case '4':
3491                                         case '5':
3492                                         case '6':
3493                                         case '7':
3494                                                 {
3495                                                         /* handle \013 */
3496                                                         int                     val;
3497
3498                                                         val = OCTVALUE(c);
3499                                                         if (cur_ptr < line_end_ptr)
3500                                                         {
3501                                                                 c = *cur_ptr;
3502                                                                 if (ISOCTAL(c))
3503                                                                 {
3504                                                                         cur_ptr++;
3505                                                                         val = (val << 3) + OCTVALUE(c);
3506                                                                         if (cur_ptr < line_end_ptr)
3507                                                                         {
3508                                                                                 c = *cur_ptr;
3509                                                                                 if (ISOCTAL(c))
3510                                                                                 {
3511                                                                                         cur_ptr++;
3512                                                                                         val = (val << 3) + OCTVALUE(c);
3513                                                                                 }
3514                                                                         }
3515                                                                 }
3516                                                         }
3517                                                         c = val & 0377;
3518                                                         if (c == '\0' || IS_HIGHBIT_SET(c))
3519                                                                 saw_non_ascii = true;
3520                                                 }
3521                                                 break;
3522                                         case 'x':
3523                                                 /* Handle \x3F */
3524                                                 if (cur_ptr < line_end_ptr)
3525                                                 {
3526                                                         char            hexchar = *cur_ptr;
3527
3528                                                         if (isxdigit((unsigned char) hexchar))
3529                                                         {
3530                                                                 int                     val = GetDecimalFromHex(hexchar);
3531
3532                                                                 cur_ptr++;
3533                                                                 if (cur_ptr < line_end_ptr)
3534                                                                 {
3535                                                                         hexchar = *cur_ptr;
3536                                                                         if (isxdigit((unsigned char) hexchar))
3537                                                                         {
3538                                                                                 cur_ptr++;
3539                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
3540                                                                         }
3541                                                                 }
3542                                                                 c = val & 0xff;
3543                                                                 if (c == '\0' || IS_HIGHBIT_SET(c))
3544                                                                         saw_non_ascii = true;
3545                                                         }
3546                                                 }
3547                                                 break;
3548                                         case 'b':
3549                                                 c = '\b';
3550                                                 break;
3551                                         case 'f':
3552                                                 c = '\f';
3553                                                 break;
3554                                         case 'n':
3555                                                 c = '\n';
3556                                                 break;
3557                                         case 'r':
3558                                                 c = '\r';
3559                                                 break;
3560                                         case 't':
3561                                                 c = '\t';
3562                                                 break;
3563                                         case 'v':
3564                                                 c = '\v';
3565                                                 break;
3566
3567                                                 /*
3568                                                  * in all other cases, take the char after '\'
3569                                                  * literally
3570                                                  */
3571                                 }
3572                         }
3573
3574                         /* Add c to output string */
3575                         *output_ptr++ = c;
3576                 }
3577
3578                 /* Check whether raw input matched null marker */
3579                 input_len = end_ptr - start_ptr;
3580                 if (input_len == cstate->null_print_len &&
3581                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3582                         cstate->raw_fields[fieldno] = NULL;
3583                 else
3584                 {
3585                         /*
3586                          * At this point we know the field is supposed to contain data.
3587                          *
3588                          * If we de-escaped any non-7-bit-ASCII chars, make sure the
3589                          * resulting string is valid data for the db encoding.
3590                          */
3591                         if (saw_non_ascii)
3592                         {
3593                                 char       *fld = cstate->raw_fields[fieldno];
3594
3595                                 pg_verifymbstr(fld, output_ptr - fld, false);
3596                         }
3597                 }
3598
3599                 /* Terminate attribute value in output area */
3600                 *output_ptr++ = '\0';
3601
3602                 fieldno++;
3603                 /* Done if we hit EOL instead of a delim */
3604                 if (!found_delim)
3605                         break;
3606         }
3607
3608         /* Clean up state of attribute_buf */
3609         output_ptr--;
3610         Assert(*output_ptr == '\0');
3611         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3612
3613         return fieldno;
3614 }
3615
3616 /*
3617  * Parse the current line into separate attributes (fields),
3618  * performing de-escaping as needed.  This has exactly the same API as
3619  * CopyReadAttributesText, except we parse the fields according to
3620  * "standard" (i.e. common) CSV usage.
3621  */
3622 static int
3623 CopyReadAttributesCSV(CopyState cstate)
3624 {
3625         char            delimc = cstate->delim[0];
3626         char            quotec = cstate->quote[0];
3627         char            escapec = cstate->escape[0];
3628         int                     fieldno;
3629         char       *output_ptr;
3630         char       *cur_ptr;
3631         char       *line_end_ptr;
3632
3633         /*
3634          * We need a special case for zero-column tables: check that the input
3635          * line is empty, and return.
3636          */
3637         if (cstate->max_fields <= 0)
3638         {
3639                 if (cstate->line_buf.len != 0)
3640                         ereport(ERROR,
3641                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3642                                          errmsg("extra data after last expected column")));
3643                 return 0;
3644         }
3645
3646         resetStringInfo(&cstate->attribute_buf);
3647
3648         /*
3649          * The de-escaped attributes will certainly not be longer than the input
3650          * data line, so we can just force attribute_buf to be large enough and
3651          * then transfer data without any checks for enough space.      We need to do
3652          * it this way because enlarging attribute_buf mid-stream would invalidate
3653          * pointers already stored into cstate->raw_fields[].
3654          */
3655         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3656                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3657         output_ptr = cstate->attribute_buf.data;
3658
3659         /* set pointer variables for loop */
3660         cur_ptr = cstate->line_buf.data;
3661         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3662
3663         /* Outer loop iterates over fields */
3664         fieldno = 0;
3665         for (;;)
3666         {
3667                 bool            found_delim = false;
3668                 bool            saw_quote = false;
3669                 char       *start_ptr;
3670                 char       *end_ptr;
3671                 int                     input_len;
3672
3673                 /* Make sure there is enough space for the next value */
3674                 if (fieldno >= cstate->max_fields)
3675                 {
3676                         cstate->max_fields *= 2;
3677                         cstate->raw_fields =
3678                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3679                 }
3680
3681                 /* Remember start of field on both input and output sides */
3682                 start_ptr = cur_ptr;
3683                 cstate->raw_fields[fieldno] = output_ptr;
3684
3685                 /*
3686                  * Scan data for field,
3687                  *
3688                  * The loop starts in "not quote" mode and then toggles between that
3689                  * and "in quote" mode. The loop exits normally if it is in "not
3690                  * quote" mode and a delimiter or line end is seen.
3691                  */
3692                 for (;;)
3693                 {
3694                         char            c;
3695
3696                         /* Not in quote */
3697                         for (;;)
3698                         {
3699                                 end_ptr = cur_ptr;
3700                                 if (cur_ptr >= line_end_ptr)
3701                                         goto endfield;
3702                                 c = *cur_ptr++;
3703                                 /* unquoted field delimiter */
3704                                 if (c == delimc)
3705                                 {
3706                                         found_delim = true;
3707                                         goto endfield;
3708                                 }
3709                                 /* start of quoted field (or part of field) */
3710                                 if (c == quotec)
3711                                 {
3712                                         saw_quote = true;
3713                                         break;
3714                                 }
3715                                 /* Add c to output string */
3716                                 *output_ptr++ = c;
3717                         }
3718
3719                         /* In quote */
3720                         for (;;)
3721                         {
3722                                 end_ptr = cur_ptr;
3723                                 if (cur_ptr >= line_end_ptr)
3724                                         ereport(ERROR,
3725                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3726                                                          errmsg("unterminated CSV quoted field")));
3727
3728                                 c = *cur_ptr++;
3729
3730                                 /* escape within a quoted field */
3731                                 if (c == escapec)
3732                                 {
3733                                         /*
3734                                          * peek at the next char if available, and escape it if it
3735                                          * is an escape char or a quote char
3736                                          */
3737                                         if (cur_ptr < line_end_ptr)
3738                                         {
3739                                                 char            nextc = *cur_ptr;
3740
3741                                                 if (nextc == escapec || nextc == quotec)
3742                                                 {
3743                                                         *output_ptr++ = nextc;
3744                                                         cur_ptr++;
3745                                                         continue;
3746                                                 }
3747                                         }
3748                                 }
3749
3750                                 /*
3751                                  * end of quoted field. Must do this test after testing for
3752                                  * escape in case quote char and escape char are the same
3753                                  * (which is the common case).
3754                                  */
3755                                 if (c == quotec)
3756                                         break;
3757
3758                                 /* Add c to output string */
3759                                 *output_ptr++ = c;
3760                         }
3761                 }
3762 endfield:
3763
3764                 /* Terminate attribute value in output area */
3765                 *output_ptr++ = '\0';
3766
3767                 /* Check whether raw input matched null marker */
3768                 input_len = end_ptr - start_ptr;
3769                 if (!saw_quote && input_len == cstate->null_print_len &&
3770                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3771                         cstate->raw_fields[fieldno] = NULL;
3772
3773                 fieldno++;
3774                 /* Done if we hit EOL instead of a delim */
3775                 if (!found_delim)
3776                         break;
3777         }
3778
3779         /* Clean up state of attribute_buf */
3780         output_ptr--;
3781         Assert(*output_ptr == '\0');
3782         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3783
3784         return fieldno;
3785 }
3786
3787
3788 /*
3789  * Read a binary attribute
3790  */
3791 static Datum
3792 CopyReadBinaryAttribute(CopyState cstate,
3793                                                 int column_no, FmgrInfo *flinfo,
3794                                                 Oid typioparam, int32 typmod,
3795                                                 bool *isnull)
3796 {
3797         int32           fld_size;
3798         Datum           result;
3799
3800         if (!CopyGetInt32(cstate, &fld_size))
3801                 ereport(ERROR,
3802                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3803                                  errmsg("unexpected EOF in COPY data")));
3804         if (fld_size == -1)
3805         {
3806                 *isnull = true;
3807                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
3808         }
3809         if (fld_size < 0)
3810                 ereport(ERROR,
3811                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3812                                  errmsg("invalid field size")));
3813
3814         /* reset attribute_buf to empty, and load raw data in it */
3815         resetStringInfo(&cstate->attribute_buf);
3816
3817         enlargeStringInfo(&cstate->attribute_buf, fld_size);
3818         if (CopyGetData(cstate, cstate->attribute_buf.data,
3819                                         fld_size, fld_size) != fld_size)
3820                 ereport(ERROR,
3821                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3822                                  errmsg("unexpected EOF in COPY data")));
3823
3824         cstate->attribute_buf.len = fld_size;
3825         cstate->attribute_buf.data[fld_size] = '\0';
3826
3827         /* Call the column type's binary input converter */
3828         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
3829                                                                  typioparam, typmod);
3830
3831         /* Trouble if it didn't eat the whole buffer */
3832         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3833                 ereport(ERROR,
3834                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3835                                  errmsg("incorrect binary data format")));
3836
3837         *isnull = false;
3838         return result;
3839 }
3840
3841 /*
3842  * Send text representation of one attribute, with conversion and escaping
3843  */
3844 #define DUMPSOFAR() \
3845         do { \
3846                 if (ptr > start) \
3847                         CopySendData(cstate, start, ptr - start); \
3848         } while (0)
3849
3850 static void
3851 CopyAttributeOutText(CopyState cstate, char *string)
3852 {
3853         char       *ptr;
3854         char       *start;
3855         char            c;
3856         char            delimc = cstate->delim[0];
3857
3858         if (cstate->need_transcoding)
3859                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
3860         else
3861                 ptr = string;
3862
3863         /*
3864          * We have to grovel through the string searching for control characters
3865          * and instances of the delimiter character.  In most cases, though, these
3866          * are infrequent.      To avoid overhead from calling CopySendData once per
3867          * character, we dump out all characters between escaped characters in a
3868          * single call.  The loop invariant is that the data from "start" to "ptr"
3869          * can be sent literally, but hasn't yet been.
3870          *
3871          * We can skip pg_encoding_mblen() overhead when encoding is safe, because
3872          * in valid backend encodings, extra bytes of a multibyte character never
3873          * look like ASCII.  This loop is sufficiently performance-critical that
3874          * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
3875          * of the normal safe-encoding path.
3876          */
3877         if (cstate->encoding_embeds_ascii)
3878         {
3879                 start = ptr;
3880                 while ((c = *ptr) != '\0')
3881                 {
3882                         if ((unsigned char) c < (unsigned char) 0x20)
3883                         {
3884                                 /*
3885                                  * \r and \n must be escaped, the others are traditional. We
3886                                  * prefer to dump these using the C-like notation, rather than
3887                                  * a backslash and the literal character, because it makes the
3888                                  * dump file a bit more proof against Microsoftish data
3889                                  * mangling.
3890                                  */
3891                                 switch (c)
3892                                 {
3893                                         case '\b':
3894                                                 c = 'b';
3895                                                 break;
3896                                         case '\f':
3897                                                 c = 'f';
3898                                                 break;
3899                                         case '\n':
3900                                                 c = 'n';
3901                                                 break;
3902                                         case '\r':
3903                                                 c = 'r';
3904                                                 break;
3905                                         case '\t':
3906                                                 c = 't';
3907                                                 break;
3908                                         case '\v':
3909                                                 c = 'v';
3910                                                 break;
3911                                         default:
3912                                                 /* If it's the delimiter, must backslash it */
3913                                                 if (c == delimc)
3914                                                         break;
3915                                                 /* All ASCII control chars are length 1 */
3916                                                 ptr++;
3917                                                 continue;               /* fall to end of loop */
3918                                 }
3919                                 /* if we get here, we need to convert the control char */
3920                                 DUMPSOFAR();
3921                                 CopySendChar(cstate, '\\');
3922                                 CopySendChar(cstate, c);
3923                                 start = ++ptr;  /* do not include char in next run */
3924                         }
3925                         else if (c == '\\' || c == delimc)
3926                         {
3927                                 DUMPSOFAR();
3928                                 CopySendChar(cstate, '\\');
3929                                 start = ptr++;  /* we include char in next run */
3930                         }
3931                         else if (IS_HIGHBIT_SET(c))
3932                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
3933                         else
3934                                 ptr++;
3935                 }
3936         }
3937         else
3938         {
3939                 start = ptr;
3940                 while ((c = *ptr) != '\0')
3941                 {
3942                         if ((unsigned char) c < (unsigned char) 0x20)
3943                         {
3944                                 /*
3945                                  * \r and \n must be escaped, the others are traditional. We
3946                                  * prefer to dump these using the C-like notation, rather than
3947                                  * a backslash and the literal character, because it makes the
3948                                  * dump file a bit more proof against Microsoftish data
3949                                  * mangling.
3950                                  */
3951                                 switch (c)
3952                                 {
3953                                         case '\b':
3954                                                 c = 'b';
3955                                                 break;
3956                                         case '\f':
3957                                                 c = 'f';
3958                                                 break;
3959                                         case '\n':
3960                                                 c = 'n';
3961                                                 break;
3962                                         case '\r':
3963                                                 c = 'r';
3964                                                 break;
3965                                         case '\t':
3966                                                 c = 't';
3967                                                 break;
3968                                         case '\v':
3969                                                 c = 'v';
3970                                                 break;
3971                                         default:
3972                                                 /* If it's the delimiter, must backslash it */
3973                                                 if (c == delimc)
3974                                                         break;
3975                                                 /* All ASCII control chars are length 1 */
3976                                                 ptr++;
3977                                                 continue;               /* fall to end of loop */
3978                                 }
3979                                 /* if we get here, we need to convert the control char */
3980                                 DUMPSOFAR();
3981                                 CopySendChar(cstate, '\\');
3982                                 CopySendChar(cstate, c);
3983                                 start = ++ptr;  /* do not include char in next run */
3984                         }
3985                         else if (c == '\\' || c == delimc)
3986                         {
3987                                 DUMPSOFAR();
3988                                 CopySendChar(cstate, '\\');
3989                                 start = ptr++;  /* we include char in next run */
3990                         }
3991                         else
3992                                 ptr++;
3993                 }
3994         }
3995
3996         DUMPSOFAR();
3997 }
3998
3999 /*
4000  * Send text representation of one attribute, with conversion and
4001  * CSV-style escaping
4002  */
4003 static void
4004 CopyAttributeOutCSV(CopyState cstate, char *string,
4005                                         bool use_quote, bool single_attr)
4006 {
4007         char       *ptr;
4008         char       *start;
4009         char            c;
4010         char            delimc = cstate->delim[0];
4011         char            quotec = cstate->quote[0];
4012         char            escapec = cstate->escape[0];
4013
4014         /* force quoting if it matches null_print (before conversion!) */
4015         if (!use_quote && strcmp(string, cstate->null_print) == 0)
4016                 use_quote = true;
4017
4018         if (cstate->need_transcoding)
4019                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
4020         else
4021                 ptr = string;
4022
4023         /*
4024          * Make a preliminary pass to discover if it needs quoting
4025          */
4026         if (!use_quote)
4027         {
4028                 /*
4029                  * Because '\.' can be a data value, quote it if it appears alone on a
4030                  * line so it is not interpreted as the end-of-data marker.
4031                  */
4032                 if (single_attr && strcmp(ptr, "\\.") == 0)
4033                         use_quote = true;
4034                 else
4035                 {
4036                         char       *tptr = ptr;
4037
4038                         while ((c = *tptr) != '\0')
4039                         {
4040                                 if (c == delimc || c == quotec || c == '\n' || c == '\r')
4041                                 {
4042                                         use_quote = true;
4043                                         break;
4044                                 }
4045                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4046                                         tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
4047                                 else
4048                                         tptr++;
4049                         }
4050                 }
4051         }
4052
4053         if (use_quote)
4054         {
4055                 CopySendChar(cstate, quotec);
4056
4057                 /*
4058                  * We adopt the same optimization strategy as in CopyAttributeOutText
4059                  */
4060                 start = ptr;
4061                 while ((c = *ptr) != '\0')
4062                 {
4063                         if (c == quotec || c == escapec)
4064                         {
4065                                 DUMPSOFAR();
4066                                 CopySendChar(cstate, escapec);
4067                                 start = ptr;    /* we include char in next run */
4068                         }
4069                         if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4070                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
4071                         else
4072                                 ptr++;
4073                 }
4074                 DUMPSOFAR();
4075
4076                 CopySendChar(cstate, quotec);
4077         }
4078         else
4079         {
4080                 /* If it doesn't need quoting, we can just dump it as-is */
4081                 CopySendString(cstate, ptr);
4082         }
4083 }
4084
4085 /*
4086  * CopyGetAttnums - build an integer list of attnums to be copied
4087  *
4088  * The input attnamelist is either the user-specified column list,
4089  * or NIL if there was none (in which case we want all the non-dropped
4090  * columns).
4091  *
4092  * rel can be NULL ... it's only used for error reports.
4093  */
4094 static List *
4095 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
4096 {
4097         List       *attnums = NIL;
4098
4099         if (attnamelist == NIL)
4100         {
4101                 /* Generate default column list */
4102                 Form_pg_attribute *attr = tupDesc->attrs;
4103                 int                     attr_count = tupDesc->natts;
4104                 int                     i;
4105
4106                 for (i = 0; i < attr_count; i++)
4107                 {
4108                         if (attr[i]->attisdropped)
4109                                 continue;
4110                         attnums = lappend_int(attnums, i + 1);
4111                 }
4112         }
4113         else
4114         {
4115                 /* Validate the user-supplied list and extract attnums */
4116                 ListCell   *l;
4117
4118                 foreach(l, attnamelist)
4119                 {
4120                         char       *name = strVal(lfirst(l));
4121                         int                     attnum;
4122                         int                     i;
4123
4124                         /* Lookup column name */
4125                         attnum = InvalidAttrNumber;
4126                         for (i = 0; i < tupDesc->natts; i++)
4127                         {
4128                                 if (tupDesc->attrs[i]->attisdropped)
4129                                         continue;
4130                                 if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0)
4131                                 {
4132                                         attnum = tupDesc->attrs[i]->attnum;
4133                                         break;
4134                                 }
4135                         }
4136                         if (attnum == InvalidAttrNumber)
4137                         {
4138                                 if (rel != NULL)
4139                                         ereport(ERROR,
4140                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4141                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
4142                                                    name, RelationGetRelationName(rel))));
4143                                 else
4144                                         ereport(ERROR,
4145                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4146                                                          errmsg("column \"%s\" does not exist",
4147                                                                         name)));
4148                         }
4149                         /* Check for duplicates */
4150                         if (list_member_int(attnums, attnum))
4151                                 ereport(ERROR,
4152                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
4153                                                  errmsg("column \"%s\" specified more than once",
4154                                                                 name)));
4155                         attnums = lappend_int(attnums, attnum);
4156                 }
4157         }
4158
4159         return attnums;
4160 }
4161
4162
4163 /*
4164  * copy_dest_startup --- executor startup
4165  */
4166 static void
4167 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
4168 {
4169         /* no-op */
4170 }
4171
4172 /*
4173  * copy_dest_receive --- receive one tuple
4174  */
4175 static void
4176 copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
4177 {
4178         DR_copy    *myState = (DR_copy *) self;
4179         CopyState       cstate = myState->cstate;
4180
4181         /* Make sure the tuple is fully deconstructed */
4182         slot_getallattrs(slot);
4183
4184         /* And send the data */
4185         CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
4186         myState->processed++;
4187 }
4188
4189 /*
4190  * copy_dest_shutdown --- executor end
4191  */
4192 static void
4193 copy_dest_shutdown(DestReceiver *self)
4194 {
4195         /* no-op */
4196 }
4197
4198 /*
4199  * copy_dest_destroy --- release DestReceiver object
4200  */
4201 static void
4202 copy_dest_destroy(DestReceiver *self)
4203 {
4204         pfree(self);
4205 }
4206
4207 /*
4208  * CreateCopyDestReceiver -- create a suitable DestReceiver object
4209  */
4210 DestReceiver *
4211 CreateCopyDestReceiver(void)
4212 {
4213         DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
4214
4215         self->pub.receiveSlot = copy_dest_receive;
4216         self->pub.rStartup = copy_dest_startup;
4217         self->pub.rShutdown = copy_dest_shutdown;
4218         self->pub.rDestroy = copy_dest_destroy;
4219         self->pub.mydest = DestCopyOut;
4220
4221         self->cstate = NULL;            /* will be set later */
4222         self->processed = 0;
4223
4224         return (DestReceiver *) self;
4225 }