]> granicus.if.org Git - postgresql/blob - src/backend/commands/copy.c
pgindent run for 9.4
[postgresql] / src / backend / commands / copy.c
1 /*-------------------------------------------------------------------------
2  *
3  * copy.c
4  *              Implements the COPY utility command
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/commands/copy.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <ctype.h>
18 #include <unistd.h>
19 #include <sys/stat.h>
20 #include <netinet/in.h>
21 #include <arpa/inet.h>
22
23 #include "access/heapam.h"
24 #include "access/htup_details.h"
25 #include "access/sysattr.h"
26 #include "access/xact.h"
27 #include "catalog/namespace.h"
28 #include "catalog/pg_type.h"
29 #include "commands/copy.h"
30 #include "commands/defrem.h"
31 #include "commands/trigger.h"
32 #include "executor/executor.h"
33 #include "libpq/libpq.h"
34 #include "libpq/pqformat.h"
35 #include "mb/pg_wchar.h"
36 #include "miscadmin.h"
37 #include "optimizer/clauses.h"
38 #include "optimizer/planner.h"
39 #include "parser/parse_relation.h"
40 #include "rewrite/rewriteHandler.h"
41 #include "storage/fd.h"
42 #include "tcop/tcopprot.h"
43 #include "utils/acl.h"
44 #include "utils/builtins.h"
45 #include "utils/lsyscache.h"
46 #include "utils/memutils.h"
47 #include "utils/portal.h"
48 #include "utils/rel.h"
49 #include "utils/snapmgr.h"
50
51
52 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
53 #define OCTVALUE(c) ((c) - '0')
54
55 /*
56  * Represents the different source/dest cases we need to worry about at
57  * the bottom level
58  */
59 typedef enum CopyDest
60 {
61         COPY_FILE,                                      /* to/from file (or a piped program) */
62         COPY_OLD_FE,                            /* to/from frontend (2.0 protocol) */
63         COPY_NEW_FE                                     /* to/from frontend (3.0 protocol) */
64 } CopyDest;
65
66 /*
67  *      Represents the end-of-line terminator type of the input
68  */
69 typedef enum EolType
70 {
71         EOL_UNKNOWN,
72         EOL_NL,
73         EOL_CR,
74         EOL_CRNL
75 } EolType;
76
77 /*
78  * This struct contains all the state variables used throughout a COPY
79  * operation. For simplicity, we use the same struct for all variants of COPY,
80  * even though some fields are used in only some cases.
81  *
82  * Multi-byte encodings: all supported client-side encodings encode multi-byte
83  * characters by having the first byte's high bit set. Subsequent bytes of the
84  * character can have the high bit not set. When scanning data in such an
85  * encoding to look for a match to a single-byte (ie ASCII) character, we must
86  * use the full pg_encoding_mblen() machinery to skip over multibyte
87  * characters, else we might find a false match to a trailing byte. In
88  * supported server encodings, there is no possibility of a false match, and
89  * it's faster to make useless comparisons to trailing bytes than it is to
90  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
91  * when we have to do it the hard way.
92  */
93 typedef struct CopyStateData
94 {
95         /* low-level state data */
96         CopyDest        copy_dest;              /* type of copy source/destination */
97         FILE       *copy_file;          /* used if copy_dest == COPY_FILE */
98         StringInfo      fe_msgbuf;              /* used for all dests during COPY TO, only for
99                                                                  * dest == COPY_NEW_FE in COPY FROM */
100         bool            fe_eof;                 /* true if detected end of copy data */
101         EolType         eol_type;               /* EOL type of input */
102         int                     file_encoding;  /* file or remote side's character encoding */
103         bool            need_transcoding;               /* file encoding diff from server? */
104         bool            encoding_embeds_ascii;  /* ASCII can be non-first byte? */
105
106         /* parameters from the COPY command */
107         Relation        rel;                    /* relation to copy to or from */
108         QueryDesc  *queryDesc;          /* executable query to copy from */
109         List       *attnumlist;         /* integer list of attnums to copy */
110         char       *filename;           /* filename, or NULL for STDIN/STDOUT */
111         bool            is_program;             /* is 'filename' a program to popen? */
112         bool            binary;                 /* binary format? */
113         bool            oids;                   /* include OIDs? */
114         bool            freeze;                 /* freeze rows on loading? */
115         bool            csv_mode;               /* Comma Separated Value format? */
116         bool            header_line;    /* CSV header line? */
117         char       *null_print;         /* NULL marker string (server encoding!) */
118         int                     null_print_len; /* length of same */
119         char       *null_print_client;          /* same converted to file encoding */
120         char       *delim;                      /* column delimiter (must be 1 byte) */
121         char       *quote;                      /* CSV quote char (must be 1 byte) */
122         char       *escape;                     /* CSV escape char (must be 1 byte) */
123         List       *force_quote;        /* list of column names */
124         bool            force_quote_all;        /* FORCE QUOTE *? */
125         bool       *force_quote_flags;          /* per-column CSV FQ flags */
126         List       *force_notnull;      /* list of column names */
127         bool       *force_notnull_flags;        /* per-column CSV FNN flags */
128         List       *force_null;         /* list of column names */
129         bool       *force_null_flags;           /* per-column CSV FN flags */
130         bool            convert_selectively;    /* do selective binary conversion? */
131         List       *convert_select; /* list of column names (can be NIL) */
132         bool       *convert_select_flags;       /* per-column CSV/TEXT CS flags */
133
134         /* these are just for error messages, see CopyFromErrorCallback */
135         const char *cur_relname;        /* table name for error messages */
136         int                     cur_lineno;             /* line number for error messages */
137         const char *cur_attname;        /* current att for error messages */
138         const char *cur_attval;         /* current att value for error messages */
139
140         /*
141          * Working state for COPY TO/FROM
142          */
143         MemoryContext copycontext;      /* per-copy execution context */
144
145         /*
146          * Working state for COPY TO
147          */
148         FmgrInfo   *out_functions;      /* lookup info for output functions */
149         MemoryContext rowcontext;       /* per-row evaluation context */
150
151         /*
152          * Working state for COPY FROM
153          */
154         AttrNumber      num_defaults;
155         bool            file_has_oids;
156         FmgrInfo        oid_in_function;
157         Oid                     oid_typioparam;
158         FmgrInfo   *in_functions;       /* array of input functions for each attrs */
159         Oid                *typioparams;        /* array of element types for in_functions */
160         int                *defmap;                     /* array of default att numbers */
161         ExprState **defexprs;           /* array of default att expressions */
162         bool            volatile_defexprs;              /* is any of defexprs volatile? */
163
164         /*
165          * These variables are used to reduce overhead in textual COPY FROM.
166          *
167          * attribute_buf holds the separated, de-escaped text for each field of
168          * the current line.  The CopyReadAttributes functions return arrays of
169          * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
170          * the buffer on each cycle.
171          */
172         StringInfoData attribute_buf;
173
174         /* field raw data pointers found by COPY FROM */
175
176         int                     max_fields;
177         char      **raw_fields;
178
179         /*
180          * Similarly, line_buf holds the whole input line being processed. The
181          * input cycle is first to read the whole line into line_buf, convert it
182          * to server encoding there, and then extract the individual attribute
183          * fields into attribute_buf.  line_buf is preserved unmodified so that we
184          * can display it in error messages if appropriate.
185          */
186         StringInfoData line_buf;
187         bool            line_buf_converted;             /* converted to server encoding? */
188         bool            line_buf_valid; /* contains the row being processed? */
189
190         /*
191          * Finally, raw_buf holds raw data read from the data source (file or
192          * client connection).  CopyReadLine parses this data sufficiently to
193          * locate line boundaries, then transfers the data to line_buf and
194          * converts it.  Note: we guarantee that there is a \0 at
195          * raw_buf[raw_buf_len].
196          */
197 #define RAW_BUF_SIZE 65536              /* we palloc RAW_BUF_SIZE+1 bytes */
198         char       *raw_buf;
199         int                     raw_buf_index;  /* next byte to process */
200         int                     raw_buf_len;    /* total # of bytes stored */
201 } CopyStateData;
202
203 /* DestReceiver for COPY (SELECT) TO */
204 typedef struct
205 {
206         DestReceiver pub;                       /* publicly-known function pointers */
207         CopyState       cstate;                 /* CopyStateData for the command */
208         uint64          processed;              /* # of tuples processed */
209 } DR_copy;
210
211
212 /*
213  * These macros centralize code used to process line_buf and raw_buf buffers.
214  * They are macros because they often do continue/break control and to avoid
215  * function call overhead in tight COPY loops.
216  *
217  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
218  * prevent the continue/break processing from working.  We end the "if (1)"
219  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
220  * any "else" in the calling code, and to avoid any compiler warnings about
221  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
222  */
223
224 /*
225  * This keeps the character read at the top of the loop in the buffer
226  * even if there is more than one read-ahead.
227  */
228 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
229 if (1) \
230 { \
231         if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
232         { \
233                 raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
234                 need_data = true; \
235                 continue; \
236         } \
237 } else ((void) 0)
238
239 /* This consumes the remainder of the buffer and breaks */
240 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
241 if (1) \
242 { \
243         if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
244         { \
245                 if (extralen) \
246                         raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
247                 /* backslash just before EOF, treat as data char */ \
248                 result = true; \
249                 break; \
250         } \
251 } else ((void) 0)
252
253 /*
254  * Transfer any approved data to line_buf; must do this to be sure
255  * there is some room in raw_buf.
256  */
257 #define REFILL_LINEBUF \
258 if (1) \
259 { \
260         if (raw_buf_ptr > cstate->raw_buf_index) \
261         { \
262                 appendBinaryStringInfo(&cstate->line_buf, \
263                                                          cstate->raw_buf + cstate->raw_buf_index, \
264                                                            raw_buf_ptr - cstate->raw_buf_index); \
265                 cstate->raw_buf_index = raw_buf_ptr; \
266         } \
267 } else ((void) 0)
268
269 /* Undo any read-ahead and jump out of the block. */
270 #define NO_END_OF_COPY_GOTO \
271 if (1) \
272 { \
273         raw_buf_ptr = prev_raw_ptr + 1; \
274         goto not_end_of_copy; \
275 } else ((void) 0)
276
277 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
278
279
280 /* non-export function prototypes */
281 static CopyState BeginCopy(bool is_from, Relation rel, Node *raw_query,
282                   const char *queryString, List *attnamelist, List *options);
283 static void EndCopy(CopyState cstate);
284 static void ClosePipeToProgram(CopyState cstate);
285 static CopyState BeginCopyTo(Relation rel, Node *query, const char *queryString,
286                         const char *filename, bool is_program, List *attnamelist,
287                         List *options);
288 static void EndCopyTo(CopyState cstate);
289 static uint64 DoCopyTo(CopyState cstate);
290 static uint64 CopyTo(CopyState cstate);
291 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
292                          Datum *values, bool *nulls);
293 static uint64 CopyFrom(CopyState cstate);
294 static void CopyFromInsertBatch(CopyState cstate, EState *estate,
295                                         CommandId mycid, int hi_options,
296                                         ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
297                                         BulkInsertState bistate,
298                                         int nBufferedTuples, HeapTuple *bufferedTuples,
299                                         int firstBufferedLineNo);
300 static bool CopyReadLine(CopyState cstate);
301 static bool CopyReadLineText(CopyState cstate);
302 static int      CopyReadAttributesText(CopyState cstate);
303 static int      CopyReadAttributesCSV(CopyState cstate);
304 static Datum CopyReadBinaryAttribute(CopyState cstate,
305                                                 int column_no, FmgrInfo *flinfo,
306                                                 Oid typioparam, int32 typmod,
307                                                 bool *isnull);
308 static void CopyAttributeOutText(CopyState cstate, char *string);
309 static void CopyAttributeOutCSV(CopyState cstate, char *string,
310                                         bool use_quote, bool single_attr);
311 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
312                            List *attnamelist);
313 static char *limit_printout_length(const char *str);
314
315 /* Low-level communications functions */
316 static void SendCopyBegin(CopyState cstate);
317 static void ReceiveCopyBegin(CopyState cstate);
318 static void SendCopyEnd(CopyState cstate);
319 static void CopySendData(CopyState cstate, const void *databuf, int datasize);
320 static void CopySendString(CopyState cstate, const char *str);
321 static void CopySendChar(CopyState cstate, char c);
322 static void CopySendEndOfRow(CopyState cstate);
323 static int CopyGetData(CopyState cstate, void *databuf,
324                         int minread, int maxread);
325 static void CopySendInt32(CopyState cstate, int32 val);
326 static bool CopyGetInt32(CopyState cstate, int32 *val);
327 static void CopySendInt16(CopyState cstate, int16 val);
328 static bool CopyGetInt16(CopyState cstate, int16 *val);
329
330
331 /*
332  * Send copy start/stop messages for frontend copies.  These have changed
333  * in past protocol redesigns.
334  */
335 static void
336 SendCopyBegin(CopyState cstate)
337 {
338         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
339         {
340                 /* new way */
341                 StringInfoData buf;
342                 int                     natts = list_length(cstate->attnumlist);
343                 int16           format = (cstate->binary ? 1 : 0);
344                 int                     i;
345
346                 pq_beginmessage(&buf, 'H');
347                 pq_sendbyte(&buf, format);              /* overall format */
348                 pq_sendint(&buf, natts, 2);
349                 for (i = 0; i < natts; i++)
350                         pq_sendint(&buf, format, 2);            /* per-column formats */
351                 pq_endmessage(&buf);
352                 cstate->copy_dest = COPY_NEW_FE;
353         }
354         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
355         {
356                 /* old way */
357                 if (cstate->binary)
358                         ereport(ERROR,
359                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
360                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
361                 pq_putemptymessage('H');
362                 /* grottiness needed for old COPY OUT protocol */
363                 pq_startcopyout();
364                 cstate->copy_dest = COPY_OLD_FE;
365         }
366         else
367         {
368                 /* very old way */
369                 if (cstate->binary)
370                         ereport(ERROR,
371                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
372                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
373                 pq_putemptymessage('B');
374                 /* grottiness needed for old COPY OUT protocol */
375                 pq_startcopyout();
376                 cstate->copy_dest = COPY_OLD_FE;
377         }
378 }
379
380 static void
381 ReceiveCopyBegin(CopyState cstate)
382 {
383         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
384         {
385                 /* new way */
386                 StringInfoData buf;
387                 int                     natts = list_length(cstate->attnumlist);
388                 int16           format = (cstate->binary ? 1 : 0);
389                 int                     i;
390
391                 pq_beginmessage(&buf, 'G');
392                 pq_sendbyte(&buf, format);              /* overall format */
393                 pq_sendint(&buf, natts, 2);
394                 for (i = 0; i < natts; i++)
395                         pq_sendint(&buf, format, 2);            /* per-column formats */
396                 pq_endmessage(&buf);
397                 cstate->copy_dest = COPY_NEW_FE;
398                 cstate->fe_msgbuf = makeStringInfo();
399         }
400         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
401         {
402                 /* old way */
403                 if (cstate->binary)
404                         ereport(ERROR,
405                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
406                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
407                 pq_putemptymessage('G');
408                 cstate->copy_dest = COPY_OLD_FE;
409         }
410         else
411         {
412                 /* very old way */
413                 if (cstate->binary)
414                         ereport(ERROR,
415                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
416                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
417                 pq_putemptymessage('D');
418                 cstate->copy_dest = COPY_OLD_FE;
419         }
420         /* We *must* flush here to ensure FE knows it can send. */
421         pq_flush();
422 }
423
424 static void
425 SendCopyEnd(CopyState cstate)
426 {
427         if (cstate->copy_dest == COPY_NEW_FE)
428         {
429                 /* Shouldn't have any unsent data */
430                 Assert(cstate->fe_msgbuf->len == 0);
431                 /* Send Copy Done message */
432                 pq_putemptymessage('c');
433         }
434         else
435         {
436                 CopySendData(cstate, "\\.", 2);
437                 /* Need to flush out the trailer (this also appends a newline) */
438                 CopySendEndOfRow(cstate);
439                 pq_endcopyout(false);
440         }
441 }
442
443 /*----------
444  * CopySendData sends output data to the destination (file or frontend)
445  * CopySendString does the same for null-terminated strings
446  * CopySendChar does the same for single characters
447  * CopySendEndOfRow does the appropriate thing at end of each data row
448  *      (data is not actually flushed except by CopySendEndOfRow)
449  *
450  * NB: no data conversion is applied by these functions
451  *----------
452  */
453 static void
454 CopySendData(CopyState cstate, const void *databuf, int datasize)
455 {
456         appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
457 }
458
459 static void
460 CopySendString(CopyState cstate, const char *str)
461 {
462         appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
463 }
464
465 static void
466 CopySendChar(CopyState cstate, char c)
467 {
468         appendStringInfoCharMacro(cstate->fe_msgbuf, c);
469 }
470
471 static void
472 CopySendEndOfRow(CopyState cstate)
473 {
474         StringInfo      fe_msgbuf = cstate->fe_msgbuf;
475
476         switch (cstate->copy_dest)
477         {
478                 case COPY_FILE:
479                         if (!cstate->binary)
480                         {
481                                 /* Default line termination depends on platform */
482 #ifndef WIN32
483                                 CopySendChar(cstate, '\n');
484 #else
485                                 CopySendString(cstate, "\r\n");
486 #endif
487                         }
488
489                         if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
490                                            cstate->copy_file) != 1 ||
491                                 ferror(cstate->copy_file))
492                         {
493                                 if (cstate->is_program)
494                                 {
495                                         if (errno == EPIPE)
496                                         {
497                                                 /*
498                                                  * The pipe will be closed automatically on error at
499                                                  * the end of transaction, but we might get a better
500                                                  * error message from the subprocess' exit code than
501                                                  * just "Broken Pipe"
502                                                  */
503                                                 ClosePipeToProgram(cstate);
504
505                                                 /*
506                                                  * If ClosePipeToProgram() didn't throw an error, the
507                                                  * program terminated normally, but closed the pipe
508                                                  * first. Restore errno, and throw an error.
509                                                  */
510                                                 errno = EPIPE;
511                                         }
512                                         ereport(ERROR,
513                                                         (errcode_for_file_access(),
514                                                          errmsg("could not write to COPY program: %m")));
515                                 }
516                                 else
517                                         ereport(ERROR,
518                                                         (errcode_for_file_access(),
519                                                          errmsg("could not write to COPY file: %m")));
520                         }
521                         break;
522                 case COPY_OLD_FE:
523                         /* The FE/BE protocol uses \n as newline for all platforms */
524                         if (!cstate->binary)
525                                 CopySendChar(cstate, '\n');
526
527                         if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
528                         {
529                                 /* no hope of recovering connection sync, so FATAL */
530                                 ereport(FATAL,
531                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
532                                                  errmsg("connection lost during COPY to stdout")));
533                         }
534                         break;
535                 case COPY_NEW_FE:
536                         /* The FE/BE protocol uses \n as newline for all platforms */
537                         if (!cstate->binary)
538                                 CopySendChar(cstate, '\n');
539
540                         /* Dump the accumulated row as one CopyData message */
541                         (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
542                         break;
543         }
544
545         resetStringInfo(fe_msgbuf);
546 }
547
548 /*
549  * CopyGetData reads data from the source (file or frontend)
550  *
551  * We attempt to read at least minread, and at most maxread, bytes from
552  * the source.  The actual number of bytes read is returned; if this is
553  * less than minread, EOF was detected.
554  *
555  * Note: when copying from the frontend, we expect a proper EOF mark per
556  * protocol; if the frontend simply drops the connection, we raise error.
557  * It seems unwise to allow the COPY IN to complete normally in that case.
558  *
559  * NB: no data conversion is applied here.
560  */
561 static int
562 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
563 {
564         int                     bytesread = 0;
565
566         switch (cstate->copy_dest)
567         {
568                 case COPY_FILE:
569                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
570                         if (ferror(cstate->copy_file))
571                                 ereport(ERROR,
572                                                 (errcode_for_file_access(),
573                                                  errmsg("could not read from COPY file: %m")));
574                         break;
575                 case COPY_OLD_FE:
576
577                         /*
578                          * We cannot read more than minread bytes (which in practice is 1)
579                          * because old protocol doesn't have any clear way of separating
580                          * the COPY stream from following data.  This is slow, but not any
581                          * slower than the code path was originally, and we don't care
582                          * much anymore about the performance of old protocol.
583                          */
584                         if (pq_getbytes((char *) databuf, minread))
585                         {
586                                 /* Only a \. terminator is legal EOF in old protocol */
587                                 ereport(ERROR,
588                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
589                                                  errmsg("unexpected EOF on client connection with an open transaction")));
590                         }
591                         bytesread = minread;
592                         break;
593                 case COPY_NEW_FE:
594                         while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
595                         {
596                                 int                     avail;
597
598                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
599                                 {
600                                         /* Try to receive another message */
601                                         int                     mtype;
602
603                         readmessage:
604                                         mtype = pq_getbyte();
605                                         if (mtype == EOF)
606                                                 ereport(ERROR,
607                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
608                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
609                                         if (pq_getmessage(cstate->fe_msgbuf, 0))
610                                                 ereport(ERROR,
611                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
612                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
613                                         switch (mtype)
614                                         {
615                                                 case 'd':               /* CopyData */
616                                                         break;
617                                                 case 'c':               /* CopyDone */
618                                                         /* COPY IN correctly terminated by frontend */
619                                                         cstate->fe_eof = true;
620                                                         return bytesread;
621                                                 case 'f':               /* CopyFail */
622                                                         ereport(ERROR,
623                                                                         (errcode(ERRCODE_QUERY_CANCELED),
624                                                                          errmsg("COPY from stdin failed: %s",
625                                                                            pq_getmsgstring(cstate->fe_msgbuf))));
626                                                         break;
627                                                 case 'H':               /* Flush */
628                                                 case 'S':               /* Sync */
629
630                                                         /*
631                                                          * Ignore Flush/Sync for the convenience of client
632                                                          * libraries (such as libpq) that may send those
633                                                          * without noticing that the command they just
634                                                          * sent was COPY.
635                                                          */
636                                                         goto readmessage;
637                                                 default:
638                                                         ereport(ERROR,
639                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
640                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
641                                                                                         mtype)));
642                                                         break;
643                                         }
644                                 }
645                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
646                                 if (avail > maxread)
647                                         avail = maxread;
648                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
649                                 databuf = (void *) ((char *) databuf + avail);
650                                 maxread -= avail;
651                                 bytesread += avail;
652                         }
653                         break;
654         }
655
656         return bytesread;
657 }
658
659
660 /*
661  * These functions do apply some data conversion
662  */
663
664 /*
665  * CopySendInt32 sends an int32 in network byte order
666  */
667 static void
668 CopySendInt32(CopyState cstate, int32 val)
669 {
670         uint32          buf;
671
672         buf = htonl((uint32) val);
673         CopySendData(cstate, &buf, sizeof(buf));
674 }
675
676 /*
677  * CopyGetInt32 reads an int32 that appears in network byte order
678  *
679  * Returns true if OK, false if EOF
680  */
681 static bool
682 CopyGetInt32(CopyState cstate, int32 *val)
683 {
684         uint32          buf;
685
686         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
687         {
688                 *val = 0;                               /* suppress compiler warning */
689                 return false;
690         }
691         *val = (int32) ntohl(buf);
692         return true;
693 }
694
695 /*
696  * CopySendInt16 sends an int16 in network byte order
697  */
698 static void
699 CopySendInt16(CopyState cstate, int16 val)
700 {
701         uint16          buf;
702
703         buf = htons((uint16) val);
704         CopySendData(cstate, &buf, sizeof(buf));
705 }
706
707 /*
708  * CopyGetInt16 reads an int16 that appears in network byte order
709  */
710 static bool
711 CopyGetInt16(CopyState cstate, int16 *val)
712 {
713         uint16          buf;
714
715         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
716         {
717                 *val = 0;                               /* suppress compiler warning */
718                 return false;
719         }
720         *val = (int16) ntohs(buf);
721         return true;
722 }
723
724
725 /*
726  * CopyLoadRawBuf loads some more data into raw_buf
727  *
728  * Returns TRUE if able to obtain at least one more byte, else FALSE.
729  *
730  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
731  * down to the start of the buffer and then we load more data after that.
732  * This case is used only when a frontend multibyte character crosses a
733  * bufferload boundary.
734  */
735 static bool
736 CopyLoadRawBuf(CopyState cstate)
737 {
738         int                     nbytes;
739         int                     inbytes;
740
741         if (cstate->raw_buf_index < cstate->raw_buf_len)
742         {
743                 /* Copy down the unprocessed data */
744                 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
745                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
746                                 nbytes);
747         }
748         else
749                 nbytes = 0;                             /* no data need be saved */
750
751         inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
752                                                   1, RAW_BUF_SIZE - nbytes);
753         nbytes += inbytes;
754         cstate->raw_buf[nbytes] = '\0';
755         cstate->raw_buf_index = 0;
756         cstate->raw_buf_len = nbytes;
757         return (inbytes > 0);
758 }
759
760
761 /*
762  *       DoCopy executes the SQL COPY statement
763  *
764  * Either unload or reload contents of table <relation>, depending on <from>.
765  * (<from> = TRUE means we are inserting into the table.)  In the "TO" case
766  * we also support copying the output of an arbitrary SELECT query.
767  *
768  * If <pipe> is false, transfer is between the table and the file named
769  * <filename>.  Otherwise, transfer is between the table and our regular
770  * input/output stream. The latter could be either stdin/stdout or a
771  * socket, depending on whether we're running under Postmaster control.
772  *
773  * Do not allow a Postgres user without superuser privilege to read from
774  * or write to a file.
775  *
776  * Do not allow the copy if user doesn't have proper permission to access
777  * the table or the specifically requested columns.
778  */
779 Oid
780 DoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed)
781 {
782         CopyState       cstate;
783         bool            is_from = stmt->is_from;
784         bool            pipe = (stmt->filename == NULL);
785         Relation        rel;
786         Oid                     relid;
787
788         /* Disallow COPY to/from file or program except to superusers. */
789         if (!pipe && !superuser())
790         {
791                 if (stmt->is_program)
792                         ereport(ERROR,
793                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
794                                          errmsg("must be superuser to COPY to or from an external program"),
795                                          errhint("Anyone can COPY to stdout or from stdin. "
796                                                    "psql's \\copy command also works for anyone.")));
797                 else
798                         ereport(ERROR,
799                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
800                                          errmsg("must be superuser to COPY to or from a file"),
801                                          errhint("Anyone can COPY to stdout or from stdin. "
802                                                    "psql's \\copy command also works for anyone.")));
803         }
804
805         if (stmt->relation)
806         {
807                 TupleDesc       tupDesc;
808                 AclMode         required_access = (is_from ? ACL_INSERT : ACL_SELECT);
809                 RangeTblEntry *rte;
810                 List       *attnums;
811                 ListCell   *cur;
812
813                 Assert(!stmt->query);
814
815                 /* Open and lock the relation, using the appropriate lock type. */
816                 rel = heap_openrv(stmt->relation,
817                                                   (is_from ? RowExclusiveLock : AccessShareLock));
818
819                 relid = RelationGetRelid(rel);
820
821                 rte = makeNode(RangeTblEntry);
822                 rte->rtekind = RTE_RELATION;
823                 rte->relid = RelationGetRelid(rel);
824                 rte->relkind = rel->rd_rel->relkind;
825                 rte->requiredPerms = required_access;
826
827                 tupDesc = RelationGetDescr(rel);
828                 attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
829                 foreach(cur, attnums)
830                 {
831                         int                     attno = lfirst_int(cur) -
832                         FirstLowInvalidHeapAttributeNumber;
833
834                         if (is_from)
835                                 rte->modifiedCols = bms_add_member(rte->modifiedCols, attno);
836                         else
837                                 rte->selectedCols = bms_add_member(rte->selectedCols, attno);
838                 }
839                 ExecCheckRTPerms(list_make1(rte), true);
840         }
841         else
842         {
843                 Assert(stmt->query);
844
845                 relid = InvalidOid;
846                 rel = NULL;
847         }
848
849         if (is_from)
850         {
851                 Assert(rel);
852
853                 /* check read-only transaction */
854                 if (XactReadOnly && !rel->rd_islocaltemp)
855                         PreventCommandIfReadOnly("COPY FROM");
856
857                 cstate = BeginCopyFrom(rel, stmt->filename, stmt->is_program,
858                                                            stmt->attlist, stmt->options);
859                 *processed = CopyFrom(cstate);  /* copy from file to database */
860                 EndCopyFrom(cstate);
861         }
862         else
863         {
864                 cstate = BeginCopyTo(rel, stmt->query, queryString,
865                                                          stmt->filename, stmt->is_program,
866                                                          stmt->attlist, stmt->options);
867                 *processed = DoCopyTo(cstate);  /* copy from database to file */
868                 EndCopyTo(cstate);
869         }
870
871         /*
872          * Close the relation. If reading, we can release the AccessShareLock we
873          * got; if writing, we should hold the lock until end of transaction to
874          * ensure that updates will be committed before lock is released.
875          */
876         if (rel != NULL)
877                 heap_close(rel, (is_from ? NoLock : AccessShareLock));
878
879         return relid;
880 }
881
882 /*
883  * Process the statement option list for COPY.
884  *
885  * Scan the options list (a list of DefElem) and transpose the information
886  * into cstate, applying appropriate error checking.
887  *
888  * cstate is assumed to be filled with zeroes initially.
889  *
890  * This is exported so that external users of the COPY API can sanity-check
891  * a list of options.  In that usage, cstate should be passed as NULL
892  * (since external users don't know sizeof(CopyStateData)) and the collected
893  * data is just leaked until CurrentMemoryContext is reset.
894  *
895  * Note that additional checking, such as whether column names listed in FORCE
896  * QUOTE actually exist, has to be applied later.  This just checks for
897  * self-consistency of the options list.
898  */
899 void
900 ProcessCopyOptions(CopyState cstate,
901                                    bool is_from,
902                                    List *options)
903 {
904         bool            format_specified = false;
905         ListCell   *option;
906
907         /* Support external use for option sanity checking */
908         if (cstate == NULL)
909                 cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
910
911         cstate->file_encoding = -1;
912
913         /* Extract options from the statement node tree */
914         foreach(option, options)
915         {
916                 DefElem    *defel = (DefElem *) lfirst(option);
917
918                 if (strcmp(defel->defname, "format") == 0)
919                 {
920                         char       *fmt = defGetString(defel);
921
922                         if (format_specified)
923                                 ereport(ERROR,
924                                                 (errcode(ERRCODE_SYNTAX_ERROR),
925                                                  errmsg("conflicting or redundant options")));
926                         format_specified = true;
927                         if (strcmp(fmt, "text") == 0)
928                                  /* default format */ ;
929                         else if (strcmp(fmt, "csv") == 0)
930                                 cstate->csv_mode = true;
931                         else if (strcmp(fmt, "binary") == 0)
932                                 cstate->binary = true;
933                         else
934                                 ereport(ERROR,
935                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
936                                                  errmsg("COPY format \"%s\" not recognized", fmt)));
937                 }
938                 else if (strcmp(defel->defname, "oids") == 0)
939                 {
940                         if (cstate->oids)
941                                 ereport(ERROR,
942                                                 (errcode(ERRCODE_SYNTAX_ERROR),
943                                                  errmsg("conflicting or redundant options")));
944                         cstate->oids = defGetBoolean(defel);
945                 }
946                 else if (strcmp(defel->defname, "freeze") == 0)
947                 {
948                         if (cstate->freeze)
949                                 ereport(ERROR,
950                                                 (errcode(ERRCODE_SYNTAX_ERROR),
951                                                  errmsg("conflicting or redundant options")));
952                         cstate->freeze = defGetBoolean(defel);
953                 }
954                 else if (strcmp(defel->defname, "delimiter") == 0)
955                 {
956                         if (cstate->delim)
957                                 ereport(ERROR,
958                                                 (errcode(ERRCODE_SYNTAX_ERROR),
959                                                  errmsg("conflicting or redundant options")));
960                         cstate->delim = defGetString(defel);
961                 }
962                 else if (strcmp(defel->defname, "null") == 0)
963                 {
964                         if (cstate->null_print)
965                                 ereport(ERROR,
966                                                 (errcode(ERRCODE_SYNTAX_ERROR),
967                                                  errmsg("conflicting or redundant options")));
968                         cstate->null_print = defGetString(defel);
969                 }
970                 else if (strcmp(defel->defname, "header") == 0)
971                 {
972                         if (cstate->header_line)
973                                 ereport(ERROR,
974                                                 (errcode(ERRCODE_SYNTAX_ERROR),
975                                                  errmsg("conflicting or redundant options")));
976                         cstate->header_line = defGetBoolean(defel);
977                 }
978                 else if (strcmp(defel->defname, "quote") == 0)
979                 {
980                         if (cstate->quote)
981                                 ereport(ERROR,
982                                                 (errcode(ERRCODE_SYNTAX_ERROR),
983                                                  errmsg("conflicting or redundant options")));
984                         cstate->quote = defGetString(defel);
985                 }
986                 else if (strcmp(defel->defname, "escape") == 0)
987                 {
988                         if (cstate->escape)
989                                 ereport(ERROR,
990                                                 (errcode(ERRCODE_SYNTAX_ERROR),
991                                                  errmsg("conflicting or redundant options")));
992                         cstate->escape = defGetString(defel);
993                 }
994                 else if (strcmp(defel->defname, "force_quote") == 0)
995                 {
996                         if (cstate->force_quote || cstate->force_quote_all)
997                                 ereport(ERROR,
998                                                 (errcode(ERRCODE_SYNTAX_ERROR),
999                                                  errmsg("conflicting or redundant options")));
1000                         if (defel->arg && IsA(defel->arg, A_Star))
1001                                 cstate->force_quote_all = true;
1002                         else if (defel->arg && IsA(defel->arg, List))
1003                                 cstate->force_quote = (List *) defel->arg;
1004                         else
1005                                 ereport(ERROR,
1006                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1007                                                  errmsg("argument to option \"%s\" must be a list of column names",
1008                                                                 defel->defname)));
1009                 }
1010                 else if (strcmp(defel->defname, "force_not_null") == 0)
1011                 {
1012                         if (cstate->force_notnull)
1013                                 ereport(ERROR,
1014                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1015                                                  errmsg("conflicting or redundant options")));
1016                         if (defel->arg && IsA(defel->arg, List))
1017                                 cstate->force_notnull = (List *) defel->arg;
1018                         else
1019                                 ereport(ERROR,
1020                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1021                                                  errmsg("argument to option \"%s\" must be a list of column names",
1022                                                                 defel->defname)));
1023                 }
1024                 else if (strcmp(defel->defname, "force_null") == 0)
1025                 {
1026                         if (cstate->force_null)
1027                                 ereport(ERROR,
1028                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1029                                                  errmsg("conflicting or redundant options")));
1030                         if (defel->arg && IsA(defel->arg, List))
1031                                 cstate->force_null = (List *) defel->arg;
1032                         else
1033                                 ereport(ERROR,
1034                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1035                                                  errmsg("argument to option \"%s\" must be a list of column names",
1036                                                                 defel->defname)));
1037                 }
1038                 else if (strcmp(defel->defname, "convert_selectively") == 0)
1039                 {
1040                         /*
1041                          * Undocumented, not-accessible-from-SQL option: convert only the
1042                          * named columns to binary form, storing the rest as NULLs. It's
1043                          * allowed for the column list to be NIL.
1044                          */
1045                         if (cstate->convert_selectively)
1046                                 ereport(ERROR,
1047                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1048                                                  errmsg("conflicting or redundant options")));
1049                         cstate->convert_selectively = true;
1050                         if (defel->arg == NULL || IsA(defel->arg, List))
1051                                 cstate->convert_select = (List *) defel->arg;
1052                         else
1053                                 ereport(ERROR,
1054                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1055                                                  errmsg("argument to option \"%s\" must be a list of column names",
1056                                                                 defel->defname)));
1057                 }
1058                 else if (strcmp(defel->defname, "encoding") == 0)
1059                 {
1060                         if (cstate->file_encoding >= 0)
1061                                 ereport(ERROR,
1062                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1063                                                  errmsg("conflicting or redundant options")));
1064                         cstate->file_encoding = pg_char_to_encoding(defGetString(defel));
1065                         if (cstate->file_encoding < 0)
1066                                 ereport(ERROR,
1067                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1068                                                  errmsg("argument to option \"%s\" must be a valid encoding name",
1069                                                                 defel->defname)));
1070                 }
1071                 else
1072                         ereport(ERROR,
1073                                         (errcode(ERRCODE_SYNTAX_ERROR),
1074                                          errmsg("option \"%s\" not recognized",
1075                                                         defel->defname)));
1076         }
1077
1078         /*
1079          * Check for incompatible options (must do these two before inserting
1080          * defaults)
1081          */
1082         if (cstate->binary && cstate->delim)
1083                 ereport(ERROR,
1084                                 (errcode(ERRCODE_SYNTAX_ERROR),
1085                                  errmsg("cannot specify DELIMITER in BINARY mode")));
1086
1087         if (cstate->binary && cstate->null_print)
1088                 ereport(ERROR,
1089                                 (errcode(ERRCODE_SYNTAX_ERROR),
1090                                  errmsg("cannot specify NULL in BINARY mode")));
1091
1092         /* Set defaults for omitted options */
1093         if (!cstate->delim)
1094                 cstate->delim = cstate->csv_mode ? "," : "\t";
1095
1096         if (!cstate->null_print)
1097                 cstate->null_print = cstate->csv_mode ? "" : "\\N";
1098         cstate->null_print_len = strlen(cstate->null_print);
1099
1100         if (cstate->csv_mode)
1101         {
1102                 if (!cstate->quote)
1103                         cstate->quote = "\"";
1104                 if (!cstate->escape)
1105                         cstate->escape = cstate->quote;
1106         }
1107
1108         /* Only single-byte delimiter strings are supported. */
1109         if (strlen(cstate->delim) != 1)
1110                 ereport(ERROR,
1111                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1112                           errmsg("COPY delimiter must be a single one-byte character")));
1113
1114         /* Disallow end-of-line characters */
1115         if (strchr(cstate->delim, '\r') != NULL ||
1116                 strchr(cstate->delim, '\n') != NULL)
1117                 ereport(ERROR,
1118                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1119                          errmsg("COPY delimiter cannot be newline or carriage return")));
1120
1121         if (strchr(cstate->null_print, '\r') != NULL ||
1122                 strchr(cstate->null_print, '\n') != NULL)
1123                 ereport(ERROR,
1124                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1125                                  errmsg("COPY null representation cannot use newline or carriage return")));
1126
1127         /*
1128          * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
1129          * backslash because it would be ambiguous.  We can't allow the other
1130          * cases because data characters matching the delimiter must be
1131          * backslashed, and certain backslash combinations are interpreted
1132          * non-literally by COPY IN.  Disallowing all lower case ASCII letters is
1133          * more than strictly necessary, but seems best for consistency and
1134          * future-proofing.  Likewise we disallow all digits though only octal
1135          * digits are actually dangerous.
1136          */
1137         if (!cstate->csv_mode &&
1138                 strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
1139                            cstate->delim[0]) != NULL)
1140                 ereport(ERROR,
1141                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1142                                  errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
1143
1144         /* Check header */
1145         if (!cstate->csv_mode && cstate->header_line)
1146                 ereport(ERROR,
1147                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1148                                  errmsg("COPY HEADER available only in CSV mode")));
1149
1150         /* Check quote */
1151         if (!cstate->csv_mode && cstate->quote != NULL)
1152                 ereport(ERROR,
1153                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1154                                  errmsg("COPY quote available only in CSV mode")));
1155
1156         if (cstate->csv_mode && strlen(cstate->quote) != 1)
1157                 ereport(ERROR,
1158                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1159                                  errmsg("COPY quote must be a single one-byte character")));
1160
1161         if (cstate->csv_mode && cstate->delim[0] == cstate->quote[0])
1162                 ereport(ERROR,
1163                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1164                                  errmsg("COPY delimiter and quote must be different")));
1165
1166         /* Check escape */
1167         if (!cstate->csv_mode && cstate->escape != NULL)
1168                 ereport(ERROR,
1169                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1170                                  errmsg("COPY escape available only in CSV mode")));
1171
1172         if (cstate->csv_mode && strlen(cstate->escape) != 1)
1173                 ereport(ERROR,
1174                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1175                                  errmsg("COPY escape must be a single one-byte character")));
1176
1177         /* Check force_quote */
1178         if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all))
1179                 ereport(ERROR,
1180                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1181                                  errmsg("COPY force quote available only in CSV mode")));
1182         if ((cstate->force_quote || cstate->force_quote_all) && is_from)
1183                 ereport(ERROR,
1184                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1185                                  errmsg("COPY force quote only available using COPY TO")));
1186
1187         /* Check force_notnull */
1188         if (!cstate->csv_mode && cstate->force_notnull != NIL)
1189                 ereport(ERROR,
1190                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1191                                  errmsg("COPY force not null available only in CSV mode")));
1192         if (cstate->force_notnull != NIL && !is_from)
1193                 ereport(ERROR,
1194                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1195                           errmsg("COPY force not null only available using COPY FROM")));
1196
1197         /* Check force_null */
1198         if (!cstate->csv_mode && cstate->force_null != NIL)
1199                 ereport(ERROR,
1200                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1201                                  errmsg("COPY force null available only in CSV mode")));
1202
1203         if (cstate->force_null != NIL && !is_from)
1204                 ereport(ERROR,
1205                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1206                                  errmsg("COPY force null only available using COPY FROM")));
1207
1208         /* Don't allow the delimiter to appear in the null string. */
1209         if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
1210                 ereport(ERROR,
1211                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1212                 errmsg("COPY delimiter must not appear in the NULL specification")));
1213
1214         /* Don't allow the CSV quote char to appear in the null string. */
1215         if (cstate->csv_mode &&
1216                 strchr(cstate->null_print, cstate->quote[0]) != NULL)
1217                 ereport(ERROR,
1218                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1219                                  errmsg("CSV quote character must not appear in the NULL specification")));
1220 }
1221
1222 /*
1223  * Common setup routines used by BeginCopyFrom and BeginCopyTo.
1224  *
1225  * Iff <binary>, unload or reload in the binary format, as opposed to the
1226  * more wasteful but more robust and portable text format.
1227  *
1228  * Iff <oids>, unload or reload the format that includes OID information.
1229  * On input, we accept OIDs whether or not the table has an OID column,
1230  * but silently drop them if it does not.  On output, we report an error
1231  * if the user asks for OIDs in a table that has none (not providing an
1232  * OID column might seem friendlier, but could seriously confuse programs).
1233  *
1234  * If in the text format, delimit columns with delimiter <delim> and print
1235  * NULL values as <null_print>.
1236  */
1237 static CopyState
1238 BeginCopy(bool is_from,
1239                   Relation rel,
1240                   Node *raw_query,
1241                   const char *queryString,
1242                   List *attnamelist,
1243                   List *options)
1244 {
1245         CopyState       cstate;
1246         TupleDesc       tupDesc;
1247         int                     num_phys_attrs;
1248         MemoryContext oldcontext;
1249
1250         /* Allocate workspace and zero all fields */
1251         cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
1252
1253         /*
1254          * We allocate everything used by a cstate in a new memory context. This
1255          * avoids memory leaks during repeated use of COPY in a query.
1256          */
1257         cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
1258                                                                                                 "COPY",
1259                                                                                                 ALLOCSET_DEFAULT_MINSIZE,
1260                                                                                                 ALLOCSET_DEFAULT_INITSIZE,
1261                                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
1262
1263         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1264
1265         /* Extract options from the statement node tree */
1266         ProcessCopyOptions(cstate, is_from, options);
1267
1268         /* Process the source/target relation or query */
1269         if (rel)
1270         {
1271                 Assert(!raw_query);
1272
1273                 cstate->rel = rel;
1274
1275                 tupDesc = RelationGetDescr(cstate->rel);
1276
1277                 /* Don't allow COPY w/ OIDs to or from a table without them */
1278                 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
1279                         ereport(ERROR,
1280                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
1281                                          errmsg("table \"%s\" does not have OIDs",
1282                                                         RelationGetRelationName(cstate->rel))));
1283         }
1284         else
1285         {
1286                 List       *rewritten;
1287                 Query      *query;
1288                 PlannedStmt *plan;
1289                 DestReceiver *dest;
1290
1291                 Assert(!is_from);
1292                 cstate->rel = NULL;
1293
1294                 /* Don't allow COPY w/ OIDs from a select */
1295                 if (cstate->oids)
1296                         ereport(ERROR,
1297                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1298                                          errmsg("COPY (SELECT) WITH OIDS is not supported")));
1299
1300                 /*
1301                  * Run parse analysis and rewrite.  Note this also acquires sufficient
1302                  * locks on the source table(s).
1303                  *
1304                  * Because the parser and planner tend to scribble on their input, we
1305                  * make a preliminary copy of the source querytree.  This prevents
1306                  * problems in the case that the COPY is in a portal or plpgsql
1307                  * function and is executed repeatedly.  (See also the same hack in
1308                  * DECLARE CURSOR and PREPARE.)  XXX FIXME someday.
1309                  */
1310                 rewritten = pg_analyze_and_rewrite((Node *) copyObject(raw_query),
1311                                                                                    queryString, NULL, 0);
1312
1313                 /* We don't expect more or less than one result query */
1314                 if (list_length(rewritten) != 1)
1315                         elog(ERROR, "unexpected rewrite result");
1316
1317                 query = (Query *) linitial(rewritten);
1318
1319                 /* The grammar allows SELECT INTO, but we don't support that */
1320                 if (query->utilityStmt != NULL &&
1321                         IsA(query->utilityStmt, CreateTableAsStmt))
1322                         ereport(ERROR,
1323                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1324                                          errmsg("COPY (SELECT INTO) is not supported")));
1325
1326                 Assert(query->commandType == CMD_SELECT);
1327                 Assert(query->utilityStmt == NULL);
1328
1329                 /* plan the query */
1330                 plan = planner(query, 0, NULL);
1331
1332                 /*
1333                  * Use a snapshot with an updated command ID to ensure this query sees
1334                  * results of any previously executed queries.
1335                  */
1336                 PushCopiedSnapshot(GetActiveSnapshot());
1337                 UpdateActiveSnapshotCommandId();
1338
1339                 /* Create dest receiver for COPY OUT */
1340                 dest = CreateDestReceiver(DestCopyOut);
1341                 ((DR_copy *) dest)->cstate = cstate;
1342
1343                 /* Create a QueryDesc requesting no output */
1344                 cstate->queryDesc = CreateQueryDesc(plan, queryString,
1345                                                                                         GetActiveSnapshot(),
1346                                                                                         InvalidSnapshot,
1347                                                                                         dest, NULL, 0);
1348
1349                 /*
1350                  * Call ExecutorStart to prepare the plan for execution.
1351                  *
1352                  * ExecutorStart computes a result tupdesc for us
1353                  */
1354                 ExecutorStart(cstate->queryDesc, 0);
1355
1356                 tupDesc = cstate->queryDesc->tupDesc;
1357         }
1358
1359         /* Generate or convert list of attributes to process */
1360         cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1361
1362         num_phys_attrs = tupDesc->natts;
1363
1364         /* Convert FORCE QUOTE name list to per-column flags, check validity */
1365         cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1366         if (cstate->force_quote_all)
1367         {
1368                 int                     i;
1369
1370                 for (i = 0; i < num_phys_attrs; i++)
1371                         cstate->force_quote_flags[i] = true;
1372         }
1373         else if (cstate->force_quote)
1374         {
1375                 List       *attnums;
1376                 ListCell   *cur;
1377
1378                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote);
1379
1380                 foreach(cur, attnums)
1381                 {
1382                         int                     attnum = lfirst_int(cur);
1383
1384                         if (!list_member_int(cstate->attnumlist, attnum))
1385                                 ereport(ERROR,
1386                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1387                                    errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
1388                                                   NameStr(tupDesc->attrs[attnum - 1]->attname))));
1389                         cstate->force_quote_flags[attnum - 1] = true;
1390                 }
1391         }
1392
1393         /* Convert FORCE NOT NULL name list to per-column flags, check validity */
1394         cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1395         if (cstate->force_notnull)
1396         {
1397                 List       *attnums;
1398                 ListCell   *cur;
1399
1400                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull);
1401
1402                 foreach(cur, attnums)
1403                 {
1404                         int                     attnum = lfirst_int(cur);
1405
1406                         if (!list_member_int(cstate->attnumlist, attnum))
1407                                 ereport(ERROR,
1408                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1409                                 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
1410                                            NameStr(tupDesc->attrs[attnum - 1]->attname))));
1411                         cstate->force_notnull_flags[attnum - 1] = true;
1412                 }
1413         }
1414
1415         /* Convert FORCE NULL name list to per-column flags, check validity */
1416         cstate->force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1417         if (cstate->force_null)
1418         {
1419                 List       *attnums;
1420                 ListCell   *cur;
1421
1422                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_null);
1423
1424                 foreach(cur, attnums)
1425                 {
1426                         int                     attnum = lfirst_int(cur);
1427
1428                         if (!list_member_int(cstate->attnumlist, attnum))
1429                                 ereport(ERROR,
1430                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1431                                         errmsg("FORCE NULL column \"%s\" not referenced by COPY",
1432                                                    NameStr(tupDesc->attrs[attnum - 1]->attname))));
1433                         cstate->force_null_flags[attnum - 1] = true;
1434                 }
1435         }
1436
1437         /* Convert convert_selectively name list to per-column flags */
1438         if (cstate->convert_selectively)
1439         {
1440                 List       *attnums;
1441                 ListCell   *cur;
1442
1443                 cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1444
1445                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
1446
1447                 foreach(cur, attnums)
1448                 {
1449                         int                     attnum = lfirst_int(cur);
1450
1451                         if (!list_member_int(cstate->attnumlist, attnum))
1452                                 ereport(ERROR,
1453                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1454                                                  errmsg_internal("selected column \"%s\" not referenced by COPY",
1455                                                          NameStr(tupDesc->attrs[attnum - 1]->attname))));
1456                         cstate->convert_select_flags[attnum - 1] = true;
1457                 }
1458         }
1459
1460         /* Use client encoding when ENCODING option is not specified. */
1461         if (cstate->file_encoding < 0)
1462                 cstate->file_encoding = pg_get_client_encoding();
1463
1464         /*
1465          * Set up encoding conversion info.  Even if the file and server encodings
1466          * are the same, we must apply pg_any_to_server() to validate data in
1467          * multibyte encodings.
1468          */
1469         cstate->need_transcoding =
1470                 (cstate->file_encoding != GetDatabaseEncoding() ||
1471                  pg_database_encoding_max_length() > 1);
1472         /* See Multibyte encoding comment above */
1473         cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
1474
1475         cstate->copy_dest = COPY_FILE;          /* default */
1476
1477         MemoryContextSwitchTo(oldcontext);
1478
1479         return cstate;
1480 }
1481
1482 /*
1483  * Closes the pipe to an external program, checking the pclose() return code.
1484  */
1485 static void
1486 ClosePipeToProgram(CopyState cstate)
1487 {
1488         int                     pclose_rc;
1489
1490         Assert(cstate->is_program);
1491
1492         pclose_rc = ClosePipeStream(cstate->copy_file);
1493         if (pclose_rc == -1)
1494                 ereport(ERROR,
1495                                 (errmsg("could not close pipe to external command: %m")));
1496         else if (pclose_rc != 0)
1497                 ereport(ERROR,
1498                                 (errmsg("program \"%s\" failed",
1499                                                 cstate->filename),
1500                                  errdetail_internal("%s", wait_result_to_str(pclose_rc))));
1501 }
1502
1503 /*
1504  * Release resources allocated in a cstate for COPY TO/FROM.
1505  */
1506 static void
1507 EndCopy(CopyState cstate)
1508 {
1509         if (cstate->is_program)
1510         {
1511                 ClosePipeToProgram(cstate);
1512         }
1513         else
1514         {
1515                 if (cstate->filename != NULL && FreeFile(cstate->copy_file))
1516                         ereport(ERROR,
1517                                         (errcode_for_file_access(),
1518                                          errmsg("could not close file \"%s\": %m",
1519                                                         cstate->filename)));
1520         }
1521
1522         MemoryContextDelete(cstate->copycontext);
1523         pfree(cstate);
1524 }
1525
1526 /*
1527  * Setup CopyState to read tuples from a table or a query for COPY TO.
1528  */
1529 static CopyState
1530 BeginCopyTo(Relation rel,
1531                         Node *query,
1532                         const char *queryString,
1533                         const char *filename,
1534                         bool is_program,
1535                         List *attnamelist,
1536                         List *options)
1537 {
1538         CopyState       cstate;
1539         bool            pipe = (filename == NULL);
1540         MemoryContext oldcontext;
1541
1542         if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
1543         {
1544                 if (rel->rd_rel->relkind == RELKIND_VIEW)
1545                         ereport(ERROR,
1546                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1547                                          errmsg("cannot copy from view \"%s\"",
1548                                                         RelationGetRelationName(rel)),
1549                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1550                 else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
1551                         ereport(ERROR,
1552                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1553                                          errmsg("cannot copy from materialized view \"%s\"",
1554                                                         RelationGetRelationName(rel)),
1555                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1556                 else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1557                         ereport(ERROR,
1558                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1559                                          errmsg("cannot copy from foreign table \"%s\"",
1560                                                         RelationGetRelationName(rel)),
1561                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1562                 else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
1563                         ereport(ERROR,
1564                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1565                                          errmsg("cannot copy from sequence \"%s\"",
1566                                                         RelationGetRelationName(rel))));
1567                 else
1568                         ereport(ERROR,
1569                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1570                                          errmsg("cannot copy from non-table relation \"%s\"",
1571                                                         RelationGetRelationName(rel))));
1572         }
1573
1574         cstate = BeginCopy(false, rel, query, queryString, attnamelist, options);
1575         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1576
1577         if (pipe)
1578         {
1579                 Assert(!is_program);    /* the grammar does not allow this */
1580                 if (whereToSendOutput != DestRemote)
1581                         cstate->copy_file = stdout;
1582         }
1583         else
1584         {
1585                 cstate->filename = pstrdup(filename);
1586                 cstate->is_program = is_program;
1587
1588                 if (is_program)
1589                 {
1590                         cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
1591                         if (cstate->copy_file == NULL)
1592                                 ereport(ERROR,
1593                                                 (errmsg("could not execute command \"%s\": %m",
1594                                                                 cstate->filename)));
1595                 }
1596                 else
1597                 {
1598                         mode_t          oumask; /* Pre-existing umask value */
1599                         struct stat st;
1600
1601                         /*
1602                          * Prevent write to relative path ... too easy to shoot oneself in
1603                          * the foot by overwriting a database file ...
1604                          */
1605                         if (!is_absolute_path(filename))
1606                                 ereport(ERROR,
1607                                                 (errcode(ERRCODE_INVALID_NAME),
1608                                           errmsg("relative path not allowed for COPY to file")));
1609
1610                         oumask = umask(S_IWGRP | S_IWOTH);
1611                         cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1612                         umask(oumask);
1613                         if (cstate->copy_file == NULL)
1614                                 ereport(ERROR,
1615                                                 (errcode_for_file_access(),
1616                                                  errmsg("could not open file \"%s\" for writing: %m",
1617                                                                 cstate->filename)));
1618
1619                         fstat(fileno(cstate->copy_file), &st);
1620                         if (S_ISDIR(st.st_mode))
1621                                 ereport(ERROR,
1622                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1623                                                  errmsg("\"%s\" is a directory", cstate->filename)));
1624                 }
1625         }
1626
1627         MemoryContextSwitchTo(oldcontext);
1628
1629         return cstate;
1630 }
1631
1632 /*
1633  * This intermediate routine exists mainly to localize the effects of setjmp
1634  * so we don't need to plaster a lot of variables with "volatile".
1635  */
1636 static uint64
1637 DoCopyTo(CopyState cstate)
1638 {
1639         bool            pipe = (cstate->filename == NULL);
1640         bool            fe_copy = (pipe && whereToSendOutput == DestRemote);
1641         uint64          processed;
1642
1643         PG_TRY();
1644         {
1645                 if (fe_copy)
1646                         SendCopyBegin(cstate);
1647
1648                 processed = CopyTo(cstate);
1649
1650                 if (fe_copy)
1651                         SendCopyEnd(cstate);
1652         }
1653         PG_CATCH();
1654         {
1655                 /*
1656                  * Make sure we turn off old-style COPY OUT mode upon error. It is
1657                  * okay to do this in all cases, since it does nothing if the mode is
1658                  * not on.
1659                  */
1660                 pq_endcopyout(true);
1661                 PG_RE_THROW();
1662         }
1663         PG_END_TRY();
1664
1665         return processed;
1666 }
1667
1668 /*
1669  * Clean up storage and release resources for COPY TO.
1670  */
1671 static void
1672 EndCopyTo(CopyState cstate)
1673 {
1674         if (cstate->queryDesc != NULL)
1675         {
1676                 /* Close down the query and free resources. */
1677                 ExecutorFinish(cstate->queryDesc);
1678                 ExecutorEnd(cstate->queryDesc);
1679                 FreeQueryDesc(cstate->queryDesc);
1680                 PopActiveSnapshot();
1681         }
1682
1683         /* Clean up storage */
1684         EndCopy(cstate);
1685 }
1686
1687 /*
1688  * Copy from relation or query TO file.
1689  */
1690 static uint64
1691 CopyTo(CopyState cstate)
1692 {
1693         TupleDesc       tupDesc;
1694         int                     num_phys_attrs;
1695         Form_pg_attribute *attr;
1696         ListCell   *cur;
1697         uint64          processed;
1698
1699         if (cstate->rel)
1700                 tupDesc = RelationGetDescr(cstate->rel);
1701         else
1702                 tupDesc = cstate->queryDesc->tupDesc;
1703         attr = tupDesc->attrs;
1704         num_phys_attrs = tupDesc->natts;
1705         cstate->null_print_client = cstate->null_print;         /* default */
1706
1707         /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1708         cstate->fe_msgbuf = makeStringInfo();
1709
1710         /* Get info about the columns we need to process. */
1711         cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1712         foreach(cur, cstate->attnumlist)
1713         {
1714                 int                     attnum = lfirst_int(cur);
1715                 Oid                     out_func_oid;
1716                 bool            isvarlena;
1717
1718                 if (cstate->binary)
1719                         getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1720                                                                         &out_func_oid,
1721                                                                         &isvarlena);
1722                 else
1723                         getTypeOutputInfo(attr[attnum - 1]->atttypid,
1724                                                           &out_func_oid,
1725                                                           &isvarlena);
1726                 fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1727         }
1728
1729         /*
1730          * Create a temporary memory context that we can reset once per row to
1731          * recover palloc'd memory.  This avoids any problems with leaks inside
1732          * datatype output routines, and should be faster than retail pfree's
1733          * anyway.  (We don't need a whole econtext as CopyFrom does.)
1734          */
1735         cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1736                                                                                            "COPY TO",
1737                                                                                            ALLOCSET_DEFAULT_MINSIZE,
1738                                                                                            ALLOCSET_DEFAULT_INITSIZE,
1739                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1740
1741         if (cstate->binary)
1742         {
1743                 /* Generate header for a binary copy */
1744                 int32           tmp;
1745
1746                 /* Signature */
1747                 CopySendData(cstate, BinarySignature, 11);
1748                 /* Flags field */
1749                 tmp = 0;
1750                 if (cstate->oids)
1751                         tmp |= (1 << 16);
1752                 CopySendInt32(cstate, tmp);
1753                 /* No header extension */
1754                 tmp = 0;
1755                 CopySendInt32(cstate, tmp);
1756         }
1757         else
1758         {
1759                 /*
1760                  * For non-binary copy, we need to convert null_print to file
1761                  * encoding, because it will be sent directly with CopySendString.
1762                  */
1763                 if (cstate->need_transcoding)
1764                         cstate->null_print_client = pg_server_to_any(cstate->null_print,
1765                                                                                                           cstate->null_print_len,
1766                                                                                                           cstate->file_encoding);
1767
1768                 /* if a header has been requested send the line */
1769                 if (cstate->header_line)
1770                 {
1771                         bool            hdr_delim = false;
1772
1773                         foreach(cur, cstate->attnumlist)
1774                         {
1775                                 int                     attnum = lfirst_int(cur);
1776                                 char       *colname;
1777
1778                                 if (hdr_delim)
1779                                         CopySendChar(cstate, cstate->delim[0]);
1780                                 hdr_delim = true;
1781
1782                                 colname = NameStr(attr[attnum - 1]->attname);
1783
1784                                 CopyAttributeOutCSV(cstate, colname, false,
1785                                                                         list_length(cstate->attnumlist) == 1);
1786                         }
1787
1788                         CopySendEndOfRow(cstate);
1789                 }
1790         }
1791
1792         if (cstate->rel)
1793         {
1794                 Datum      *values;
1795                 bool       *nulls;
1796                 HeapScanDesc scandesc;
1797                 HeapTuple       tuple;
1798
1799                 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1800                 nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
1801
1802                 scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1803
1804                 processed = 0;
1805                 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1806                 {
1807                         CHECK_FOR_INTERRUPTS();
1808
1809                         /* Deconstruct the tuple ... faster than repeated heap_getattr */
1810                         heap_deform_tuple(tuple, tupDesc, values, nulls);
1811
1812                         /* Format and send the data */
1813                         CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
1814                         processed++;
1815                 }
1816
1817                 heap_endscan(scandesc);
1818
1819                 pfree(values);
1820                 pfree(nulls);
1821         }
1822         else
1823         {
1824                 /* run the plan --- the dest receiver will send tuples */
1825                 ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
1826                 processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1827         }
1828
1829         if (cstate->binary)
1830         {
1831                 /* Generate trailer for a binary copy */
1832                 CopySendInt16(cstate, -1);
1833                 /* Need to flush out the trailer */
1834                 CopySendEndOfRow(cstate);
1835         }
1836
1837         MemoryContextDelete(cstate->rowcontext);
1838
1839         return processed;
1840 }
1841
1842 /*
1843  * Emit one row during CopyTo().
1844  */
1845 static void
1846 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
1847 {
1848         bool            need_delim = false;
1849         FmgrInfo   *out_functions = cstate->out_functions;
1850         MemoryContext oldcontext;
1851         ListCell   *cur;
1852         char       *string;
1853
1854         MemoryContextReset(cstate->rowcontext);
1855         oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1856
1857         if (cstate->binary)
1858         {
1859                 /* Binary per-tuple header */
1860                 CopySendInt16(cstate, list_length(cstate->attnumlist));
1861                 /* Send OID if wanted --- note attnumlist doesn't include it */
1862                 if (cstate->oids)
1863                 {
1864                         /* Hack --- assume Oid is same size as int32 */
1865                         CopySendInt32(cstate, sizeof(int32));
1866                         CopySendInt32(cstate, tupleOid);
1867                 }
1868         }
1869         else
1870         {
1871                 /* Text format has no per-tuple header, but send OID if wanted */
1872                 /* Assume digits don't need any quoting or encoding conversion */
1873                 if (cstate->oids)
1874                 {
1875                         string = DatumGetCString(DirectFunctionCall1(oidout,
1876                                                                                                 ObjectIdGetDatum(tupleOid)));
1877                         CopySendString(cstate, string);
1878                         need_delim = true;
1879                 }
1880         }
1881
1882         foreach(cur, cstate->attnumlist)
1883         {
1884                 int                     attnum = lfirst_int(cur);
1885                 Datum           value = values[attnum - 1];
1886                 bool            isnull = nulls[attnum - 1];
1887
1888                 if (!cstate->binary)
1889                 {
1890                         if (need_delim)
1891                                 CopySendChar(cstate, cstate->delim[0]);
1892                         need_delim = true;
1893                 }
1894
1895                 if (isnull)
1896                 {
1897                         if (!cstate->binary)
1898                                 CopySendString(cstate, cstate->null_print_client);
1899                         else
1900                                 CopySendInt32(cstate, -1);
1901                 }
1902                 else
1903                 {
1904                         if (!cstate->binary)
1905                         {
1906                                 string = OutputFunctionCall(&out_functions[attnum - 1],
1907                                                                                         value);
1908                                 if (cstate->csv_mode)
1909                                         CopyAttributeOutCSV(cstate, string,
1910                                                                                 cstate->force_quote_flags[attnum - 1],
1911                                                                                 list_length(cstate->attnumlist) == 1);
1912                                 else
1913                                         CopyAttributeOutText(cstate, string);
1914                         }
1915                         else
1916                         {
1917                                 bytea      *outputbytes;
1918
1919                                 outputbytes = SendFunctionCall(&out_functions[attnum - 1],
1920                                                                                            value);
1921                                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1922                                 CopySendData(cstate, VARDATA(outputbytes),
1923                                                          VARSIZE(outputbytes) - VARHDRSZ);
1924                         }
1925                 }
1926         }
1927
1928         CopySendEndOfRow(cstate);
1929
1930         MemoryContextSwitchTo(oldcontext);
1931 }
1932
1933
1934 /*
1935  * error context callback for COPY FROM
1936  *
1937  * The argument for the error context must be CopyState.
1938  */
1939 void
1940 CopyFromErrorCallback(void *arg)
1941 {
1942         CopyState       cstate = (CopyState) arg;
1943
1944         if (cstate->binary)
1945         {
1946                 /* can't usefully display the data */
1947                 if (cstate->cur_attname)
1948                         errcontext("COPY %s, line %d, column %s",
1949                                            cstate->cur_relname, cstate->cur_lineno,
1950                                            cstate->cur_attname);
1951                 else
1952                         errcontext("COPY %s, line %d",
1953                                            cstate->cur_relname, cstate->cur_lineno);
1954         }
1955         else
1956         {
1957                 if (cstate->cur_attname && cstate->cur_attval)
1958                 {
1959                         /* error is relevant to a particular column */
1960                         char       *attval;
1961
1962                         attval = limit_printout_length(cstate->cur_attval);
1963                         errcontext("COPY %s, line %d, column %s: \"%s\"",
1964                                            cstate->cur_relname, cstate->cur_lineno,
1965                                            cstate->cur_attname, attval);
1966                         pfree(attval);
1967                 }
1968                 else if (cstate->cur_attname)
1969                 {
1970                         /* error is relevant to a particular column, value is NULL */
1971                         errcontext("COPY %s, line %d, column %s: null input",
1972                                            cstate->cur_relname, cstate->cur_lineno,
1973                                            cstate->cur_attname);
1974                 }
1975                 else
1976                 {
1977                         /*
1978                          * Error is relevant to a particular line.
1979                          *
1980                          * If line_buf still contains the correct line, and it's already
1981                          * transcoded, print it. If it's still in a foreign encoding, it's
1982                          * quite likely that the error is precisely a failure to do
1983                          * encoding conversion (ie, bad data). We dare not try to convert
1984                          * it, and at present there's no way to regurgitate it without
1985                          * conversion. So we have to punt and just report the line number.
1986                          */
1987                         if (cstate->line_buf_valid &&
1988                                 (cstate->line_buf_converted || !cstate->need_transcoding))
1989                         {
1990                                 char       *lineval;
1991
1992                                 lineval = limit_printout_length(cstate->line_buf.data);
1993                                 errcontext("COPY %s, line %d: \"%s\"",
1994                                                    cstate->cur_relname, cstate->cur_lineno, lineval);
1995                                 pfree(lineval);
1996                         }
1997                         else
1998                         {
1999                                 errcontext("COPY %s, line %d",
2000                                                    cstate->cur_relname, cstate->cur_lineno);
2001                         }
2002                 }
2003         }
2004 }
2005
2006 /*
2007  * Make sure we don't print an unreasonable amount of COPY data in a message.
2008  *
2009  * It would seem a lot easier to just use the sprintf "precision" limit to
2010  * truncate the string.  However, some versions of glibc have a bug/misfeature
2011  * that vsnprintf will always fail (return -1) if it is asked to truncate
2012  * a string that contains invalid byte sequences for the current encoding.
2013  * So, do our own truncation.  We return a pstrdup'd copy of the input.
2014  */
2015 static char *
2016 limit_printout_length(const char *str)
2017 {
2018 #define MAX_COPY_DATA_DISPLAY 100
2019
2020         int                     slen = strlen(str);
2021         int                     len;
2022         char       *res;
2023
2024         /* Fast path if definitely okay */
2025         if (slen <= MAX_COPY_DATA_DISPLAY)
2026                 return pstrdup(str);
2027
2028         /* Apply encoding-dependent truncation */
2029         len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
2030
2031         /*
2032          * Truncate, and add "..." to show we truncated the input.
2033          */
2034         res = (char *) palloc(len + 4);
2035         memcpy(res, str, len);
2036         strcpy(res + len, "...");
2037
2038         return res;
2039 }
2040
2041 /*
2042  * Copy FROM file to relation.
2043  */
2044 static uint64
2045 CopyFrom(CopyState cstate)
2046 {
2047         HeapTuple       tuple;
2048         TupleDesc       tupDesc;
2049         Datum      *values;
2050         bool       *nulls;
2051         ResultRelInfo *resultRelInfo;
2052         EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
2053         ExprContext *econtext;
2054         TupleTableSlot *myslot;
2055         MemoryContext oldcontext = CurrentMemoryContext;
2056
2057         ErrorContextCallback errcallback;
2058         CommandId       mycid = GetCurrentCommandId(true);
2059         int                     hi_options = 0; /* start with default heap_insert options */
2060         BulkInsertState bistate;
2061         uint64          processed = 0;
2062         bool            useHeapMultiInsert;
2063         int                     nBufferedTuples = 0;
2064
2065 #define MAX_BUFFERED_TUPLES 1000
2066         HeapTuple  *bufferedTuples = NULL;      /* initialize to silence warning */
2067         Size            bufferedTuplesSize = 0;
2068         int                     firstBufferedLineNo = 0;
2069
2070         Assert(cstate->rel);
2071
2072         if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
2073         {
2074                 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
2075                         ereport(ERROR,
2076                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2077                                          errmsg("cannot copy to view \"%s\"",
2078                                                         RelationGetRelationName(cstate->rel))));
2079                 else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
2080                         ereport(ERROR,
2081                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2082                                          errmsg("cannot copy to materialized view \"%s\"",
2083                                                         RelationGetRelationName(cstate->rel))));
2084                 else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2085                         ereport(ERROR,
2086                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2087                                          errmsg("cannot copy to foreign table \"%s\"",
2088                                                         RelationGetRelationName(cstate->rel))));
2089                 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
2090                         ereport(ERROR,
2091                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2092                                          errmsg("cannot copy to sequence \"%s\"",
2093                                                         RelationGetRelationName(cstate->rel))));
2094                 else
2095                         ereport(ERROR,
2096                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2097                                          errmsg("cannot copy to non-table relation \"%s\"",
2098                                                         RelationGetRelationName(cstate->rel))));
2099         }
2100
2101         tupDesc = RelationGetDescr(cstate->rel);
2102
2103         /*----------
2104          * Check to see if we can avoid writing WAL
2105          *
2106          * If archive logging/streaming is not enabled *and* either
2107          *      - table was created in same transaction as this COPY
2108          *      - data is being written to relfilenode created in this transaction
2109          * then we can skip writing WAL.  It's safe because if the transaction
2110          * doesn't commit, we'll discard the table (or the new relfilenode file).
2111          * If it does commit, we'll have done the heap_sync at the bottom of this
2112          * routine first.
2113          *
2114          * As mentioned in comments in utils/rel.h, the in-same-transaction test
2115          * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
2116          * can be cleared before the end of the transaction. The exact case is
2117          * when a relation sets a new relfilenode twice in same transaction, yet
2118          * the second one fails in an aborted subtransaction, e.g.
2119          *
2120          * BEGIN;
2121          * TRUNCATE t;
2122          * SAVEPOINT save;
2123          * TRUNCATE t;
2124          * ROLLBACK TO save;
2125          * COPY ...
2126          *
2127          * Also, if the target file is new-in-transaction, we assume that checking
2128          * FSM for free space is a waste of time, even if we must use WAL because
2129          * of archiving.  This could possibly be wrong, but it's unlikely.
2130          *
2131          * The comments for heap_insert and RelationGetBufferForTuple specify that
2132          * skipping WAL logging is only safe if we ensure that our tuples do not
2133          * go into pages containing tuples from any other transactions --- but this
2134          * must be the case if we have a new table or new relfilenode, so we need
2135          * no additional work to enforce that.
2136          *----------
2137          */
2138         /* createSubid is creation check, newRelfilenodeSubid is truncation check */
2139         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
2140                 cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2141         {
2142                 hi_options |= HEAP_INSERT_SKIP_FSM;
2143                 if (!XLogIsNeeded())
2144                         hi_options |= HEAP_INSERT_SKIP_WAL;
2145         }
2146
2147         /*
2148          * Optimize if new relfilenode was created in this subxact or one of its
2149          * committed children and we won't see those rows later as part of an
2150          * earlier scan or command. This ensures that if this subtransaction
2151          * aborts then the frozen rows won't be visible after xact cleanup. Note
2152          * that the stronger test of exactly which subtransaction created it is
2153          * crucial for correctness of this optimisation.
2154          */
2155         if (cstate->freeze)
2156         {
2157                 if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
2158                         ereport(ERROR,
2159                                         (ERRCODE_INVALID_TRANSACTION_STATE,
2160                                          errmsg("cannot perform FREEZE because of prior transaction activity")));
2161
2162                 if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
2163                  cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
2164                         ereport(ERROR,
2165                                         (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE,
2166                                          errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
2167
2168                 hi_options |= HEAP_INSERT_FROZEN;
2169         }
2170
2171         /*
2172          * We need a ResultRelInfo so we can use the regular executor's
2173          * index-entry-making machinery.  (There used to be a huge amount of code
2174          * here that basically duplicated execUtils.c ...)
2175          */
2176         resultRelInfo = makeNode(ResultRelInfo);
2177         InitResultRelInfo(resultRelInfo,
2178                                           cstate->rel,
2179                                           1,            /* dummy rangetable index */
2180                                           0);
2181
2182         ExecOpenIndices(resultRelInfo);
2183
2184         estate->es_result_relations = resultRelInfo;
2185         estate->es_num_result_relations = 1;
2186         estate->es_result_relation_info = resultRelInfo;
2187
2188         /* Set up a tuple slot too */
2189         myslot = ExecInitExtraTupleSlot(estate);
2190         ExecSetSlotDescriptor(myslot, tupDesc);
2191         /* Triggers might need a slot as well */
2192         estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
2193
2194         /*
2195          * It's more efficient to prepare a bunch of tuples for insertion, and
2196          * insert them in one heap_multi_insert() call, than call heap_insert()
2197          * separately for every tuple. However, we can't do that if there are
2198          * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
2199          * expressions. Such triggers or expressions might query the table we're
2200          * inserting to, and act differently if the tuples that have already been
2201          * processed and prepared for insertion are not there.
2202          */
2203         if ((resultRelInfo->ri_TrigDesc != NULL &&
2204                  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2205                   resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
2206                 cstate->volatile_defexprs)
2207         {
2208                 useHeapMultiInsert = false;
2209         }
2210         else
2211         {
2212                 useHeapMultiInsert = true;
2213                 bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
2214         }
2215
2216         /* Prepare to catch AFTER triggers. */
2217         AfterTriggerBeginQuery();
2218
2219         /*
2220          * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
2221          * should do this for COPY, since it's not really an "INSERT" statement as
2222          * such. However, executing these triggers maintains consistency with the
2223          * EACH ROW triggers that we already fire on COPY.
2224          */
2225         ExecBSInsertTriggers(estate, resultRelInfo);
2226
2227         values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
2228         nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
2229
2230         bistate = GetBulkInsertState();
2231         econtext = GetPerTupleExprContext(estate);
2232
2233         /* Set up callback to identify error line number */
2234         errcallback.callback = CopyFromErrorCallback;
2235         errcallback.arg = (void *) cstate;
2236         errcallback.previous = error_context_stack;
2237         error_context_stack = &errcallback;
2238
2239         for (;;)
2240         {
2241                 TupleTableSlot *slot;
2242                 bool            skip_tuple;
2243                 Oid                     loaded_oid = InvalidOid;
2244
2245                 CHECK_FOR_INTERRUPTS();
2246
2247                 if (nBufferedTuples == 0)
2248                 {
2249                         /*
2250                          * Reset the per-tuple exprcontext. We can only do this if the
2251                          * tuple buffer is empty. (Calling the context the per-tuple
2252                          * memory context is a bit of a misnomer now.)
2253                          */
2254                         ResetPerTupleExprContext(estate);
2255                 }
2256
2257                 /* Switch into its memory context */
2258                 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2259
2260                 if (!NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid))
2261                         break;
2262
2263                 /* And now we can form the input tuple. */
2264                 tuple = heap_form_tuple(tupDesc, values, nulls);
2265
2266                 if (loaded_oid != InvalidOid)
2267                         HeapTupleSetOid(tuple, loaded_oid);
2268
2269                 /*
2270                  * Constraints might reference the tableoid column, so initialize
2271                  * t_tableOid before evaluating them.
2272                  */
2273                 tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
2274
2275                 /* Triggers and stuff need to be invoked in query context. */
2276                 MemoryContextSwitchTo(oldcontext);
2277
2278                 /* Place tuple in tuple slot --- but slot shouldn't free it */
2279                 slot = myslot;
2280                 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2281
2282                 skip_tuple = false;
2283
2284                 /* BEFORE ROW INSERT Triggers */
2285                 if (resultRelInfo->ri_TrigDesc &&
2286                         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
2287                 {
2288                         slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
2289
2290                         if (slot == NULL)       /* "do nothing" */
2291                                 skip_tuple = true;
2292                         else    /* trigger might have changed tuple */
2293                                 tuple = ExecMaterializeSlot(slot);
2294                 }
2295
2296                 if (!skip_tuple)
2297                 {
2298                         /* Check the constraints of the tuple */
2299                         if (cstate->rel->rd_att->constr)
2300                                 ExecConstraints(resultRelInfo, slot, estate);
2301
2302                         if (useHeapMultiInsert)
2303                         {
2304                                 /* Add this tuple to the tuple buffer */
2305                                 if (nBufferedTuples == 0)
2306                                         firstBufferedLineNo = cstate->cur_lineno;
2307                                 bufferedTuples[nBufferedTuples++] = tuple;
2308                                 bufferedTuplesSize += tuple->t_len;
2309
2310                                 /*
2311                                  * If the buffer filled up, flush it. Also flush if the total
2312                                  * size of all the tuples in the buffer becomes large, to
2313                                  * avoid using large amounts of memory for the buffers when
2314                                  * the tuples are exceptionally wide.
2315                                  */
2316                                 if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
2317                                         bufferedTuplesSize > 65535)
2318                                 {
2319                                         CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2320                                                                                 resultRelInfo, myslot, bistate,
2321                                                                                 nBufferedTuples, bufferedTuples,
2322                                                                                 firstBufferedLineNo);
2323                                         nBufferedTuples = 0;
2324                                         bufferedTuplesSize = 0;
2325                                 }
2326                         }
2327                         else
2328                         {
2329                                 List       *recheckIndexes = NIL;
2330
2331                                 /* OK, store the tuple and create index entries for it */
2332                                 heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
2333
2334                                 if (resultRelInfo->ri_NumIndices > 0)
2335                                         recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
2336                                                                                                                    estate);
2337
2338                                 /* AFTER ROW INSERT Triggers */
2339                                 ExecARInsertTriggers(estate, resultRelInfo, tuple,
2340                                                                          recheckIndexes);
2341
2342                                 list_free(recheckIndexes);
2343                         }
2344
2345                         /*
2346                          * We count only tuples not suppressed by a BEFORE INSERT trigger;
2347                          * this is the same definition used by execMain.c for counting
2348                          * tuples inserted by an INSERT command.
2349                          */
2350                         processed++;
2351                 }
2352         }
2353
2354         /* Flush any remaining buffered tuples */
2355         if (nBufferedTuples > 0)
2356                 CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2357                                                         resultRelInfo, myslot, bistate,
2358                                                         nBufferedTuples, bufferedTuples,
2359                                                         firstBufferedLineNo);
2360
2361         /* Done, clean up */
2362         error_context_stack = errcallback.previous;
2363
2364         FreeBulkInsertState(bistate);
2365
2366         MemoryContextSwitchTo(oldcontext);
2367
2368         /* Execute AFTER STATEMENT insertion triggers */
2369         ExecASInsertTriggers(estate, resultRelInfo);
2370
2371         /* Handle queued AFTER triggers */
2372         AfterTriggerEndQuery(estate);
2373
2374         pfree(values);
2375         pfree(nulls);
2376
2377         ExecResetTupleTable(estate->es_tupleTable, false);
2378
2379         ExecCloseIndices(resultRelInfo);
2380
2381         FreeExecutorState(estate);
2382
2383         /*
2384          * If we skipped writing WAL, then we need to sync the heap (but not
2385          * indexes since those use WAL anyway)
2386          */
2387         if (hi_options & HEAP_INSERT_SKIP_WAL)
2388                 heap_sync(cstate->rel);
2389
2390         return processed;
2391 }
2392
2393 /*
2394  * A subroutine of CopyFrom, to write the current batch of buffered heap
2395  * tuples to the heap. Also updates indexes and runs AFTER ROW INSERT
2396  * triggers.
2397  */
2398 static void
2399 CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
2400                                         int hi_options, ResultRelInfo *resultRelInfo,
2401                                         TupleTableSlot *myslot, BulkInsertState bistate,
2402                                         int nBufferedTuples, HeapTuple *bufferedTuples,
2403                                         int firstBufferedLineNo)
2404 {
2405         MemoryContext oldcontext;
2406         int                     i;
2407         int                     save_cur_lineno;
2408
2409         /*
2410          * Print error context information correctly, if one of the operations
2411          * below fail.
2412          */
2413         cstate->line_buf_valid = false;
2414         save_cur_lineno = cstate->cur_lineno;
2415
2416         /*
2417          * heap_multi_insert leaks memory, so switch to short-lived memory context
2418          * before calling it.
2419          */
2420         oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2421         heap_multi_insert(cstate->rel,
2422                                           bufferedTuples,
2423                                           nBufferedTuples,
2424                                           mycid,
2425                                           hi_options,
2426                                           bistate);
2427         MemoryContextSwitchTo(oldcontext);
2428
2429         /*
2430          * If there are any indexes, update them for all the inserted tuples, and
2431          * run AFTER ROW INSERT triggers.
2432          */
2433         if (resultRelInfo->ri_NumIndices > 0)
2434         {
2435                 for (i = 0; i < nBufferedTuples; i++)
2436                 {
2437                         List       *recheckIndexes;
2438
2439                         cstate->cur_lineno = firstBufferedLineNo + i;
2440                         ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
2441                         recheckIndexes =
2442                                 ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
2443                                                                           estate);
2444                         ExecARInsertTriggers(estate, resultRelInfo,
2445                                                                  bufferedTuples[i],
2446                                                                  recheckIndexes);
2447                         list_free(recheckIndexes);
2448                 }
2449         }
2450
2451         /*
2452          * There's no indexes, but see if we need to run AFTER ROW INSERT triggers
2453          * anyway.
2454          */
2455         else if (resultRelInfo->ri_TrigDesc != NULL &&
2456                          resultRelInfo->ri_TrigDesc->trig_insert_after_row)
2457         {
2458                 for (i = 0; i < nBufferedTuples; i++)
2459                 {
2460                         cstate->cur_lineno = firstBufferedLineNo + i;
2461                         ExecARInsertTriggers(estate, resultRelInfo,
2462                                                                  bufferedTuples[i],
2463                                                                  NIL);
2464                 }
2465         }
2466
2467         /* reset cur_lineno to where we were */
2468         cstate->cur_lineno = save_cur_lineno;
2469 }
2470
2471 /*
2472  * Setup to read tuples from a file for COPY FROM.
2473  *
2474  * 'rel': Used as a template for the tuples
2475  * 'filename': Name of server-local file to read
2476  * 'attnamelist': List of char *, columns to include. NIL selects all cols.
2477  * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
2478  *
2479  * Returns a CopyState, to be passed to NextCopyFrom and related functions.
2480  */
2481 CopyState
2482 BeginCopyFrom(Relation rel,
2483                           const char *filename,
2484                           bool is_program,
2485                           List *attnamelist,
2486                           List *options)
2487 {
2488         CopyState       cstate;
2489         bool            pipe = (filename == NULL);
2490         TupleDesc       tupDesc;
2491         Form_pg_attribute *attr;
2492         AttrNumber      num_phys_attrs,
2493                                 num_defaults;
2494         FmgrInfo   *in_functions;
2495         Oid                *typioparams;
2496         int                     attnum;
2497         Oid                     in_func_oid;
2498         int                *defmap;
2499         ExprState **defexprs;
2500         MemoryContext oldcontext;
2501         bool            volatile_defexprs;
2502
2503         cstate = BeginCopy(true, rel, NULL, NULL, attnamelist, options);
2504         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
2505
2506         /* Initialize state variables */
2507         cstate->fe_eof = false;
2508         cstate->eol_type = EOL_UNKNOWN;
2509         cstate->cur_relname = RelationGetRelationName(cstate->rel);
2510         cstate->cur_lineno = 0;
2511         cstate->cur_attname = NULL;
2512         cstate->cur_attval = NULL;
2513
2514         /* Set up variables to avoid per-attribute overhead. */
2515         initStringInfo(&cstate->attribute_buf);
2516         initStringInfo(&cstate->line_buf);
2517         cstate->line_buf_converted = false;
2518         cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
2519         cstate->raw_buf_index = cstate->raw_buf_len = 0;
2520
2521         tupDesc = RelationGetDescr(cstate->rel);
2522         attr = tupDesc->attrs;
2523         num_phys_attrs = tupDesc->natts;
2524         num_defaults = 0;
2525         volatile_defexprs = false;
2526
2527         /*
2528          * Pick up the required catalog information for each attribute in the
2529          * relation, including the input function, the element type (to pass to
2530          * the input function), and info about defaults and constraints. (Which
2531          * input function we use depends on text/binary format choice.)
2532          */
2533         in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
2534         typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
2535         defmap = (int *) palloc(num_phys_attrs * sizeof(int));
2536         defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
2537
2538         for (attnum = 1; attnum <= num_phys_attrs; attnum++)
2539         {
2540                 /* We don't need info for dropped attributes */
2541                 if (attr[attnum - 1]->attisdropped)
2542                         continue;
2543
2544                 /* Fetch the input function and typioparam info */
2545                 if (cstate->binary)
2546                         getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
2547                                                                    &in_func_oid, &typioparams[attnum - 1]);
2548                 else
2549                         getTypeInputInfo(attr[attnum - 1]->atttypid,
2550                                                          &in_func_oid, &typioparams[attnum - 1]);
2551                 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
2552
2553                 /* Get default info if needed */
2554                 if (!list_member_int(cstate->attnumlist, attnum))
2555                 {
2556                         /* attribute is NOT to be copied from input */
2557                         /* use default value if one exists */
2558                         Expr       *defexpr = (Expr *) build_column_default(cstate->rel,
2559                                                                                                                                 attnum);
2560
2561                         if (defexpr != NULL)
2562                         {
2563                                 /* Run the expression through planner */
2564                                 defexpr = expression_planner(defexpr);
2565
2566                                 /* Initialize executable expression in copycontext */
2567                                 defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
2568                                 defmap[num_defaults] = attnum - 1;
2569                                 num_defaults++;
2570
2571                                 /*
2572                                  * If a default expression looks at the table being loaded,
2573                                  * then it could give the wrong answer when using
2574                                  * multi-insert. Since database access can be dynamic this is
2575                                  * hard to test for exactly, so we use the much wider test of
2576                                  * whether the default expression is volatile. We allow for
2577                                  * the special case of when the default expression is the
2578                                  * nextval() of a sequence which in this specific case is
2579                                  * known to be safe for use with the multi-insert
2580                                  * optimisation. Hence we use this special case function
2581                                  * checker rather than the standard check for
2582                                  * contain_volatile_functions().
2583                                  */
2584                                 if (!volatile_defexprs)
2585                                         volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr);
2586                         }
2587                 }
2588         }
2589
2590         /* We keep those variables in cstate. */
2591         cstate->in_functions = in_functions;
2592         cstate->typioparams = typioparams;
2593         cstate->defmap = defmap;
2594         cstate->defexprs = defexprs;
2595         cstate->volatile_defexprs = volatile_defexprs;
2596         cstate->num_defaults = num_defaults;
2597         cstate->is_program = is_program;
2598
2599         if (pipe)
2600         {
2601                 Assert(!is_program);    /* the grammar does not allow this */
2602                 if (whereToSendOutput == DestRemote)
2603                         ReceiveCopyBegin(cstate);
2604                 else
2605                         cstate->copy_file = stdin;
2606         }
2607         else
2608         {
2609                 cstate->filename = pstrdup(filename);
2610
2611                 if (cstate->is_program)
2612                 {
2613                         cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
2614                         if (cstate->copy_file == NULL)
2615                                 ereport(ERROR,
2616                                                 (errmsg("could not execute command \"%s\": %m",
2617                                                                 cstate->filename)));
2618                 }
2619                 else
2620                 {
2621                         struct stat st;
2622
2623                         cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
2624                         if (cstate->copy_file == NULL)
2625                                 ereport(ERROR,
2626                                                 (errcode_for_file_access(),
2627                                                  errmsg("could not open file \"%s\" for reading: %m",
2628                                                                 cstate->filename)));
2629
2630                         fstat(fileno(cstate->copy_file), &st);
2631                         if (S_ISDIR(st.st_mode))
2632                                 ereport(ERROR,
2633                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2634                                                  errmsg("\"%s\" is a directory", cstate->filename)));
2635                 }
2636         }
2637
2638         if (!cstate->binary)
2639         {
2640                 /* must rely on user to tell us... */
2641                 cstate->file_has_oids = cstate->oids;
2642         }
2643         else
2644         {
2645                 /* Read and verify binary header */
2646                 char            readSig[11];
2647                 int32           tmp;
2648
2649                 /* Signature */
2650                 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
2651                         memcmp(readSig, BinarySignature, 11) != 0)
2652                         ereport(ERROR,
2653                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2654                                          errmsg("COPY file signature not recognized")));
2655                 /* Flags field */
2656                 if (!CopyGetInt32(cstate, &tmp))
2657                         ereport(ERROR,
2658                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2659                                          errmsg("invalid COPY file header (missing flags)")));
2660                 cstate->file_has_oids = (tmp & (1 << 16)) != 0;
2661                 tmp &= ~(1 << 16);
2662                 if ((tmp >> 16) != 0)
2663                         ereport(ERROR,
2664                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2665                                  errmsg("unrecognized critical flags in COPY file header")));
2666                 /* Header extension length */
2667                 if (!CopyGetInt32(cstate, &tmp) ||
2668                         tmp < 0)
2669                         ereport(ERROR,
2670                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2671                                          errmsg("invalid COPY file header (missing length)")));
2672                 /* Skip extension header, if present */
2673                 while (tmp-- > 0)
2674                 {
2675                         if (CopyGetData(cstate, readSig, 1, 1) != 1)
2676                                 ereport(ERROR,
2677                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2678                                                  errmsg("invalid COPY file header (wrong length)")));
2679                 }
2680         }
2681
2682         if (cstate->file_has_oids && cstate->binary)
2683         {
2684                 getTypeBinaryInputInfo(OIDOID,
2685                                                            &in_func_oid, &cstate->oid_typioparam);
2686                 fmgr_info(in_func_oid, &cstate->oid_in_function);
2687         }
2688
2689         /* create workspace for CopyReadAttributes results */
2690         if (!cstate->binary)
2691         {
2692                 AttrNumber      attr_count = list_length(cstate->attnumlist);
2693                 int                     nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
2694
2695                 cstate->max_fields = nfields;
2696                 cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
2697         }
2698
2699         MemoryContextSwitchTo(oldcontext);
2700
2701         return cstate;
2702 }
2703
2704 /*
2705  * Read raw fields in the next line for COPY FROM in text or csv mode.
2706  * Return false if no more lines.
2707  *
2708  * An internal temporary buffer is returned via 'fields'. It is valid until
2709  * the next call of the function. Since the function returns all raw fields
2710  * in the input file, 'nfields' could be different from the number of columns
2711  * in the relation.
2712  *
2713  * NOTE: force_not_null option are not applied to the returned fields.
2714  */
2715 bool
2716 NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
2717 {
2718         int                     fldct;
2719         bool            done;
2720
2721         /* only available for text or csv input */
2722         Assert(!cstate->binary);
2723
2724         /* on input just throw the header line away */
2725         if (cstate->cur_lineno == 0 && cstate->header_line)
2726         {
2727                 cstate->cur_lineno++;
2728                 if (CopyReadLine(cstate))
2729                         return false;           /* done */
2730         }
2731
2732         cstate->cur_lineno++;
2733
2734         /* Actually read the line into memory here */
2735         done = CopyReadLine(cstate);
2736
2737         /*
2738          * EOF at start of line means we're done.  If we see EOF after some
2739          * characters, we act as though it was newline followed by EOF, ie,
2740          * process the line and then exit loop on next iteration.
2741          */
2742         if (done && cstate->line_buf.len == 0)
2743                 return false;
2744
2745         /* Parse the line into de-escaped field values */
2746         if (cstate->csv_mode)
2747                 fldct = CopyReadAttributesCSV(cstate);
2748         else
2749                 fldct = CopyReadAttributesText(cstate);
2750
2751         *fields = cstate->raw_fields;
2752         *nfields = fldct;
2753         return true;
2754 }
2755
2756 /*
2757  * Read next tuple from file for COPY FROM. Return false if no more tuples.
2758  *
2759  * 'econtext' is used to evaluate default expression for each columns not
2760  * read from the file. It can be NULL when no default values are used, i.e.
2761  * when all columns are read from the file.
2762  *
2763  * 'values' and 'nulls' arrays must be the same length as columns of the
2764  * relation passed to BeginCopyFrom. This function fills the arrays.
2765  * Oid of the tuple is returned with 'tupleOid' separately.
2766  */
2767 bool
2768 NextCopyFrom(CopyState cstate, ExprContext *econtext,
2769                          Datum *values, bool *nulls, Oid *tupleOid)
2770 {
2771         TupleDesc       tupDesc;
2772         Form_pg_attribute *attr;
2773         AttrNumber      num_phys_attrs,
2774                                 attr_count,
2775                                 num_defaults = cstate->num_defaults;
2776         FmgrInfo   *in_functions = cstate->in_functions;
2777         Oid                *typioparams = cstate->typioparams;
2778         int                     i;
2779         int                     nfields;
2780         bool            isnull;
2781         bool            file_has_oids = cstate->file_has_oids;
2782         int                *defmap = cstate->defmap;
2783         ExprState **defexprs = cstate->defexprs;
2784
2785         tupDesc = RelationGetDescr(cstate->rel);
2786         attr = tupDesc->attrs;
2787         num_phys_attrs = tupDesc->natts;
2788         attr_count = list_length(cstate->attnumlist);
2789         nfields = file_has_oids ? (attr_count + 1) : attr_count;
2790
2791         /* Initialize all values for row to NULL */
2792         MemSet(values, 0, num_phys_attrs * sizeof(Datum));
2793         MemSet(nulls, true, num_phys_attrs * sizeof(bool));
2794
2795         if (!cstate->binary)
2796         {
2797                 char      **field_strings;
2798                 ListCell   *cur;
2799                 int                     fldct;
2800                 int                     fieldno;
2801                 char       *string;
2802
2803                 /* read raw fields in the next line */
2804                 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
2805                         return false;
2806
2807                 /* check for overflowing fields */
2808                 if (nfields > 0 && fldct > nfields)
2809                         ereport(ERROR,
2810                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2811                                          errmsg("extra data after last expected column")));
2812
2813                 fieldno = 0;
2814
2815                 /* Read the OID field if present */
2816                 if (file_has_oids)
2817                 {
2818                         if (fieldno >= fldct)
2819                                 ereport(ERROR,
2820                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2821                                                  errmsg("missing data for OID column")));
2822                         string = field_strings[fieldno++];
2823
2824                         if (string == NULL)
2825                                 ereport(ERROR,
2826                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2827                                                  errmsg("null OID in COPY data")));
2828                         else if (cstate->oids && tupleOid != NULL)
2829                         {
2830                                 cstate->cur_attname = "oid";
2831                                 cstate->cur_attval = string;
2832                                 *tupleOid = DatumGetObjectId(DirectFunctionCall1(oidin,
2833                                                                                                    CStringGetDatum(string)));
2834                                 if (*tupleOid == InvalidOid)
2835                                         ereport(ERROR,
2836                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2837                                                          errmsg("invalid OID in COPY data")));
2838                                 cstate->cur_attname = NULL;
2839                                 cstate->cur_attval = NULL;
2840                         }
2841                 }
2842
2843                 /* Loop to read the user attributes on the line. */
2844                 foreach(cur, cstate->attnumlist)
2845                 {
2846                         int                     attnum = lfirst_int(cur);
2847                         int                     m = attnum - 1;
2848
2849                         if (fieldno >= fldct)
2850                                 ereport(ERROR,
2851                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2852                                                  errmsg("missing data for column \"%s\"",
2853                                                                 NameStr(attr[m]->attname))));
2854                         string = field_strings[fieldno++];
2855
2856                         if (cstate->convert_select_flags &&
2857                                 !cstate->convert_select_flags[m])
2858                         {
2859                                 /* ignore input field, leaving column as NULL */
2860                                 continue;
2861                         }
2862
2863                         if (cstate->csv_mode)
2864                         {
2865                                 if (string == NULL &&
2866                                         cstate->force_notnull_flags[m])
2867                                 {
2868                                         /*
2869                                          * FORCE_NOT_NULL option is set and column is NULL -
2870                                          * convert it to the NULL string.
2871                                          */
2872                                         string = cstate->null_print;
2873                                 }
2874                                 else if (string != NULL && cstate->force_null_flags[m]
2875                                                  && strcmp(string, cstate->null_print) == 0)
2876                                 {
2877                                         /*
2878                                          * FORCE_NULL option is set and column matches the NULL
2879                                          * string. It must have been quoted, or otherwise the
2880                                          * string would already have been set to NULL. Convert it
2881                                          * to NULL as specified.
2882                                          */
2883                                         string = NULL;
2884                                 }
2885                         }
2886
2887                         cstate->cur_attname = NameStr(attr[m]->attname);
2888                         cstate->cur_attval = string;
2889                         values[m] = InputFunctionCall(&in_functions[m],
2890                                                                                   string,
2891                                                                                   typioparams[m],
2892                                                                                   attr[m]->atttypmod);
2893                         if (string != NULL)
2894                                 nulls[m] = false;
2895                         cstate->cur_attname = NULL;
2896                         cstate->cur_attval = NULL;
2897                 }
2898
2899                 Assert(fieldno == nfields);
2900         }
2901         else
2902         {
2903                 /* binary */
2904                 int16           fld_count;
2905                 ListCell   *cur;
2906
2907                 cstate->cur_lineno++;
2908
2909                 if (!CopyGetInt16(cstate, &fld_count))
2910                 {
2911                         /* EOF detected (end of file, or protocol-level EOF) */
2912                         return false;
2913                 }
2914
2915                 if (fld_count == -1)
2916                 {
2917                         /*
2918                          * Received EOF marker.  In a V3-protocol copy, wait for the
2919                          * protocol-level EOF, and complain if it doesn't come
2920                          * immediately.  This ensures that we correctly handle CopyFail,
2921                          * if client chooses to send that now.
2922                          *
2923                          * Note that we MUST NOT try to read more data in an old-protocol
2924                          * copy, since there is no protocol-level EOF marker then.  We
2925                          * could go either way for copy from file, but choose to throw
2926                          * error if there's data after the EOF marker, for consistency
2927                          * with the new-protocol case.
2928                          */
2929                         char            dummy;
2930
2931                         if (cstate->copy_dest != COPY_OLD_FE &&
2932                                 CopyGetData(cstate, &dummy, 1, 1) > 0)
2933                                 ereport(ERROR,
2934                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2935                                                  errmsg("received copy data after EOF marker")));
2936                         return false;
2937                 }
2938
2939                 if (fld_count != attr_count)
2940                         ereport(ERROR,
2941                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2942                                          errmsg("row field count is %d, expected %d",
2943                                                         (int) fld_count, attr_count)));
2944
2945                 if (file_has_oids)
2946                 {
2947                         Oid                     loaded_oid;
2948
2949                         cstate->cur_attname = "oid";
2950                         loaded_oid =
2951                                 DatumGetObjectId(CopyReadBinaryAttribute(cstate,
2952                                                                                                                  0,
2953                                                                                                         &cstate->oid_in_function,
2954                                                                                                           cstate->oid_typioparam,
2955                                                                                                                  -1,
2956                                                                                                                  &isnull));
2957                         if (isnull || loaded_oid == InvalidOid)
2958                                 ereport(ERROR,
2959                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2960                                                  errmsg("invalid OID in COPY data")));
2961                         cstate->cur_attname = NULL;
2962                         if (cstate->oids && tupleOid != NULL)
2963                                 *tupleOid = loaded_oid;
2964                 }
2965
2966                 i = 0;
2967                 foreach(cur, cstate->attnumlist)
2968                 {
2969                         int                     attnum = lfirst_int(cur);
2970                         int                     m = attnum - 1;
2971
2972                         cstate->cur_attname = NameStr(attr[m]->attname);
2973                         i++;
2974                         values[m] = CopyReadBinaryAttribute(cstate,
2975                                                                                                 i,
2976                                                                                                 &in_functions[m],
2977                                                                                                 typioparams[m],
2978                                                                                                 attr[m]->atttypmod,
2979                                                                                                 &nulls[m]);
2980                         cstate->cur_attname = NULL;
2981                 }
2982         }
2983
2984         /*
2985          * Now compute and insert any defaults available for the columns not
2986          * provided by the input data.  Anything not processed here or above will
2987          * remain NULL.
2988          */
2989         for (i = 0; i < num_defaults; i++)
2990         {
2991                 /*
2992                  * The caller must supply econtext and have switched into the
2993                  * per-tuple memory context in it.
2994                  */
2995                 Assert(econtext != NULL);
2996                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
2997
2998                 values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
2999                                                                                  &nulls[defmap[i]], NULL);
3000         }
3001
3002         return true;
3003 }
3004
3005 /*
3006  * Clean up storage and release resources for COPY FROM.
3007  */
3008 void
3009 EndCopyFrom(CopyState cstate)
3010 {
3011         /* No COPY FROM related resources except memory. */
3012
3013         EndCopy(cstate);
3014 }
3015
3016 /*
3017  * Read the next input line and stash it in line_buf, with conversion to
3018  * server encoding.
3019  *
3020  * Result is true if read was terminated by EOF, false if terminated
3021  * by newline.  The terminating newline or EOF marker is not included
3022  * in the final value of line_buf.
3023  */
3024 static bool
3025 CopyReadLine(CopyState cstate)
3026 {
3027         bool            result;
3028
3029         resetStringInfo(&cstate->line_buf);
3030         cstate->line_buf_valid = true;
3031
3032         /* Mark that encoding conversion hasn't occurred yet */
3033         cstate->line_buf_converted = false;
3034
3035         /* Parse data and transfer into line_buf */
3036         result = CopyReadLineText(cstate);
3037
3038         if (result)
3039         {
3040                 /*
3041                  * Reached EOF.  In protocol version 3, we should ignore anything
3042                  * after \. up to the protocol end of copy data.  (XXX maybe better
3043                  * not to treat \. as special?)
3044                  */
3045                 if (cstate->copy_dest == COPY_NEW_FE)
3046                 {
3047                         do
3048                         {
3049                                 cstate->raw_buf_index = cstate->raw_buf_len;
3050                         } while (CopyLoadRawBuf(cstate));
3051                 }
3052         }
3053         else
3054         {
3055                 /*
3056                  * If we didn't hit EOF, then we must have transferred the EOL marker
3057                  * to line_buf along with the data.  Get rid of it.
3058                  */
3059                 switch (cstate->eol_type)
3060                 {
3061                         case EOL_NL:
3062                                 Assert(cstate->line_buf.len >= 1);
3063                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
3064                                 cstate->line_buf.len--;
3065                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
3066                                 break;
3067                         case EOL_CR:
3068                                 Assert(cstate->line_buf.len >= 1);
3069                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
3070                                 cstate->line_buf.len--;
3071                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
3072                                 break;
3073                         case EOL_CRNL:
3074                                 Assert(cstate->line_buf.len >= 2);
3075                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
3076                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
3077                                 cstate->line_buf.len -= 2;
3078                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
3079                                 break;
3080                         case EOL_UNKNOWN:
3081                                 /* shouldn't get here */
3082                                 Assert(false);
3083                                 break;
3084                 }
3085         }
3086
3087         /* Done reading the line.  Convert it to server encoding. */
3088         if (cstate->need_transcoding)
3089         {
3090                 char       *cvt;
3091
3092                 cvt = pg_any_to_server(cstate->line_buf.data,
3093                                                            cstate->line_buf.len,
3094                                                            cstate->file_encoding);
3095                 if (cvt != cstate->line_buf.data)
3096                 {
3097                         /* transfer converted data back to line_buf */
3098                         resetStringInfo(&cstate->line_buf);
3099                         appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
3100                         pfree(cvt);
3101                 }
3102         }
3103
3104         /* Now it's safe to use the buffer in error messages */
3105         cstate->line_buf_converted = true;
3106
3107         return result;
3108 }
3109
3110 /*
3111  * CopyReadLineText - inner loop of CopyReadLine for text mode
3112  */
3113 static bool
3114 CopyReadLineText(CopyState cstate)
3115 {
3116         char       *copy_raw_buf;
3117         int                     raw_buf_ptr;
3118         int                     copy_buf_len;
3119         bool            need_data = false;
3120         bool            hit_eof = false;
3121         bool            result = false;
3122         char            mblen_str[2];
3123
3124         /* CSV variables */
3125         bool            first_char_in_line = true;
3126         bool            in_quote = false,
3127                                 last_was_esc = false;
3128         char            quotec = '\0';
3129         char            escapec = '\0';
3130
3131         if (cstate->csv_mode)
3132         {
3133                 quotec = cstate->quote[0];
3134                 escapec = cstate->escape[0];
3135                 /* ignore special escape processing if it's the same as quotec */
3136                 if (quotec == escapec)
3137                         escapec = '\0';
3138         }
3139
3140         mblen_str[1] = '\0';
3141
3142         /*
3143          * The objective of this loop is to transfer the entire next input line
3144          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
3145          * \n) and the end-of-copy marker (\.).
3146          *
3147          * In CSV mode, \r and \n inside a quoted field are just part of the data
3148          * value and are put in line_buf.  We keep just enough state to know if we
3149          * are currently in a quoted field or not.
3150          *
3151          * These four characters, and the CSV escape and quote characters, are
3152          * assumed the same in frontend and backend encodings.
3153          *
3154          * For speed, we try to move data from raw_buf to line_buf in chunks
3155          * rather than one character at a time.  raw_buf_ptr points to the next
3156          * character to examine; any characters from raw_buf_index to raw_buf_ptr
3157          * have been determined to be part of the line, but not yet transferred to
3158          * line_buf.
3159          *
3160          * For a little extra speed within the loop, we copy raw_buf and
3161          * raw_buf_len into local variables.
3162          */
3163         copy_raw_buf = cstate->raw_buf;
3164         raw_buf_ptr = cstate->raw_buf_index;
3165         copy_buf_len = cstate->raw_buf_len;
3166
3167         for (;;)
3168         {
3169                 int                     prev_raw_ptr;
3170                 char            c;
3171
3172                 /*
3173                  * Load more data if needed.  Ideally we would just force four bytes
3174                  * of read-ahead and avoid the many calls to
3175                  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
3176                  * does not allow us to read too far ahead or we might read into the
3177                  * next data, so we read-ahead only as far we know we can.  One
3178                  * optimization would be to read-ahead four byte here if
3179                  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
3180                  * considering the size of the buffer.
3181                  */
3182                 if (raw_buf_ptr >= copy_buf_len || need_data)
3183                 {
3184                         REFILL_LINEBUF;
3185
3186                         /*
3187                          * Try to read some more data.  This will certainly reset
3188                          * raw_buf_index to zero, and raw_buf_ptr must go with it.
3189                          */
3190                         if (!CopyLoadRawBuf(cstate))
3191                                 hit_eof = true;
3192                         raw_buf_ptr = 0;
3193                         copy_buf_len = cstate->raw_buf_len;
3194
3195                         /*
3196                          * If we are completely out of data, break out of the loop,
3197                          * reporting EOF.
3198                          */
3199                         if (copy_buf_len <= 0)
3200                         {
3201                                 result = true;
3202                                 break;
3203                         }
3204                         need_data = false;
3205                 }
3206
3207                 /* OK to fetch a character */
3208                 prev_raw_ptr = raw_buf_ptr;
3209                 c = copy_raw_buf[raw_buf_ptr++];
3210
3211                 if (cstate->csv_mode)
3212                 {
3213                         /*
3214                          * If character is '\\' or '\r', we may need to look ahead below.
3215                          * Force fetch of the next character if we don't already have it.
3216                          * We need to do this before changing CSV state, in case one of
3217                          * these characters is also the quote or escape character.
3218                          *
3219                          * Note: old-protocol does not like forced prefetch, but it's OK
3220                          * here since we cannot validly be at EOF.
3221                          */
3222                         if (c == '\\' || c == '\r')
3223                         {
3224                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3225                         }
3226
3227                         /*
3228                          * Dealing with quotes and escapes here is mildly tricky. If the
3229                          * quote char is also the escape char, there's no problem - we
3230                          * just use the char as a toggle. If they are different, we need
3231                          * to ensure that we only take account of an escape inside a
3232                          * quoted field and immediately preceding a quote char, and not
3233                          * the second in a escape-escape sequence.
3234                          */
3235                         if (in_quote && c == escapec)
3236                                 last_was_esc = !last_was_esc;
3237                         if (c == quotec && !last_was_esc)
3238                                 in_quote = !in_quote;
3239                         if (c != escapec)
3240                                 last_was_esc = false;
3241
3242                         /*
3243                          * Updating the line count for embedded CR and/or LF chars is
3244                          * necessarily a little fragile - this test is probably about the
3245                          * best we can do.  (XXX it's arguable whether we should do this
3246                          * at all --- is cur_lineno a physical or logical count?)
3247                          */
3248                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
3249                                 cstate->cur_lineno++;
3250                 }
3251
3252                 /* Process \r */
3253                 if (c == '\r' && (!cstate->csv_mode || !in_quote))
3254                 {
3255                         /* Check for \r\n on first line, _and_ handle \r\n. */
3256                         if (cstate->eol_type == EOL_UNKNOWN ||
3257                                 cstate->eol_type == EOL_CRNL)
3258                         {
3259                                 /*
3260                                  * If need more data, go back to loop top to load it.
3261                                  *
3262                                  * Note that if we are at EOF, c will wind up as '\0' because
3263                                  * of the guaranteed pad of raw_buf.
3264                                  */
3265                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3266
3267                                 /* get next char */
3268                                 c = copy_raw_buf[raw_buf_ptr];
3269
3270                                 if (c == '\n')
3271                                 {
3272                                         raw_buf_ptr++;          /* eat newline */
3273                                         cstate->eol_type = EOL_CRNL;            /* in case not set yet */
3274                                 }
3275                                 else
3276                                 {
3277                                         /* found \r, but no \n */
3278                                         if (cstate->eol_type == EOL_CRNL)
3279                                                 ereport(ERROR,
3280                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3281                                                                  !cstate->csv_mode ?
3282                                                         errmsg("literal carriage return found in data") :
3283                                                         errmsg("unquoted carriage return found in data"),
3284                                                                  !cstate->csv_mode ?
3285                                                 errhint("Use \"\\r\" to represent carriage return.") :
3286                                                                  errhint("Use quoted CSV field to represent carriage return.")));
3287
3288                                         /*
3289                                          * if we got here, it is the first line and we didn't find
3290                                          * \n, so don't consume the peeked character
3291                                          */
3292                                         cstate->eol_type = EOL_CR;
3293                                 }
3294                         }
3295                         else if (cstate->eol_type == EOL_NL)
3296                                 ereport(ERROR,
3297                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3298                                                  !cstate->csv_mode ?
3299                                                  errmsg("literal carriage return found in data") :
3300                                                  errmsg("unquoted carriage return found in data"),
3301                                                  !cstate->csv_mode ?
3302                                            errhint("Use \"\\r\" to represent carriage return.") :
3303                                                  errhint("Use quoted CSV field to represent carriage return.")));
3304                         /* If reach here, we have found the line terminator */
3305                         break;
3306                 }
3307
3308                 /* Process \n */
3309                 if (c == '\n' && (!cstate->csv_mode || !in_quote))
3310                 {
3311                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
3312                                 ereport(ERROR,
3313                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3314                                                  !cstate->csv_mode ?
3315                                                  errmsg("literal newline found in data") :
3316                                                  errmsg("unquoted newline found in data"),
3317                                                  !cstate->csv_mode ?
3318                                                  errhint("Use \"\\n\" to represent newline.") :
3319                                          errhint("Use quoted CSV field to represent newline.")));
3320                         cstate->eol_type = EOL_NL;      /* in case not set yet */
3321                         /* If reach here, we have found the line terminator */
3322                         break;
3323                 }
3324
3325                 /*
3326                  * In CSV mode, we only recognize \. alone on a line.  This is because
3327                  * \. is a valid CSV data value.
3328                  */
3329                 if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
3330                 {
3331                         char            c2;
3332
3333                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3334                         IF_NEED_REFILL_AND_EOF_BREAK(0);
3335
3336                         /* -----
3337                          * get next character
3338                          * Note: we do not change c so if it isn't \., we can fall
3339                          * through and continue processing for file encoding.
3340                          * -----
3341                          */
3342                         c2 = copy_raw_buf[raw_buf_ptr];
3343
3344                         if (c2 == '.')
3345                         {
3346                                 raw_buf_ptr++;  /* consume the '.' */
3347
3348                                 /*
3349                                  * Note: if we loop back for more data here, it does not
3350                                  * matter that the CSV state change checks are re-executed; we
3351                                  * will come back here with no important state changed.
3352                                  */
3353                                 if (cstate->eol_type == EOL_CRNL)
3354                                 {
3355                                         /* Get the next character */
3356                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3357                                         /* if hit_eof, c2 will become '\0' */
3358                                         c2 = copy_raw_buf[raw_buf_ptr++];
3359
3360                                         if (c2 == '\n')
3361                                         {
3362                                                 if (!cstate->csv_mode)
3363                                                         ereport(ERROR,
3364                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3365                                                                          errmsg("end-of-copy marker does not match previous newline style")));
3366                                                 else
3367                                                         NO_END_OF_COPY_GOTO;
3368                                         }
3369                                         else if (c2 != '\r')
3370                                         {
3371                                                 if (!cstate->csv_mode)
3372                                                         ereport(ERROR,
3373                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3374                                                                          errmsg("end-of-copy marker corrupt")));
3375                                                 else
3376                                                         NO_END_OF_COPY_GOTO;
3377                                         }
3378                                 }
3379
3380                                 /* Get the next character */
3381                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3382                                 /* if hit_eof, c2 will become '\0' */
3383                                 c2 = copy_raw_buf[raw_buf_ptr++];
3384
3385                                 if (c2 != '\r' && c2 != '\n')
3386                                 {
3387                                         if (!cstate->csv_mode)
3388                                                 ereport(ERROR,
3389                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3390                                                                  errmsg("end-of-copy marker corrupt")));
3391                                         else
3392                                                 NO_END_OF_COPY_GOTO;
3393                                 }
3394
3395                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
3396                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
3397                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
3398                                 {
3399                                         ereport(ERROR,
3400                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3401                                                          errmsg("end-of-copy marker does not match previous newline style")));
3402                                 }
3403
3404                                 /*
3405                                  * Transfer only the data before the \. into line_buf, then
3406                                  * discard the data and the \. sequence.
3407                                  */
3408                                 if (prev_raw_ptr > cstate->raw_buf_index)
3409                                         appendBinaryStringInfo(&cstate->line_buf,
3410                                                                          cstate->raw_buf + cstate->raw_buf_index,
3411                                                                            prev_raw_ptr - cstate->raw_buf_index);
3412                                 cstate->raw_buf_index = raw_buf_ptr;
3413                                 result = true;  /* report EOF */
3414                                 break;
3415                         }
3416                         else if (!cstate->csv_mode)
3417
3418                                 /*
3419                                  * If we are here, it means we found a backslash followed by
3420                                  * something other than a period.  In non-CSV mode, anything
3421                                  * after a backslash is special, so we skip over that second
3422                                  * character too.  If we didn't do that \\. would be
3423                                  * considered an eof-of copy, while in non-CSV mode it is a
3424                                  * literal backslash followed by a period.  In CSV mode,
3425                                  * backslashes are not special, so we want to process the
3426                                  * character after the backslash just like a normal character,
3427                                  * so we don't increment in those cases.
3428                                  */
3429                                 raw_buf_ptr++;
3430                 }
3431
3432                 /*
3433                  * This label is for CSV cases where \. appears at the start of a
3434                  * line, but there is more text after it, meaning it was a data value.
3435                  * We are more strict for \. in CSV mode because \. could be a data
3436                  * value, while in non-CSV mode, \. cannot be a data value.
3437                  */
3438 not_end_of_copy:
3439
3440                 /*
3441                  * Process all bytes of a multi-byte character as a group.
3442                  *
3443                  * We only support multi-byte sequences where the first byte has the
3444                  * high-bit set, so as an optimization we can avoid this block
3445                  * entirely if it is not set.
3446                  */
3447                 if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3448                 {
3449                         int                     mblen;
3450
3451                         mblen_str[0] = c;
3452                         /* All our encodings only read the first byte to get the length */
3453                         mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
3454                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
3455                         IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
3456                         raw_buf_ptr += mblen - 1;
3457                 }
3458                 first_char_in_line = false;
3459         }                                                       /* end of outer loop */
3460
3461         /*
3462          * Transfer any still-uncopied data to line_buf.
3463          */
3464         REFILL_LINEBUF;
3465
3466         return result;
3467 }
3468
3469 /*
3470  *      Return decimal value for a hexadecimal digit
3471  */
3472 static int
3473 GetDecimalFromHex(char hex)
3474 {
3475         if (isdigit((unsigned char) hex))
3476                 return hex - '0';
3477         else
3478                 return tolower((unsigned char) hex) - 'a' + 10;
3479 }
3480
3481 /*
3482  * Parse the current line into separate attributes (fields),
3483  * performing de-escaping as needed.
3484  *
3485  * The input is in line_buf.  We use attribute_buf to hold the result
3486  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
3487  * string, or NULL when the input matches the null marker string.
3488  * This array is expanded as necessary.
3489  *
3490  * (Note that the caller cannot check for nulls since the returned
3491  * string would be the post-de-escaping equivalent, which may look
3492  * the same as some valid data string.)
3493  *
3494  * delim is the column delimiter string (must be just one byte for now).
3495  * null_print is the null marker string.  Note that this is compared to
3496  * the pre-de-escaped input string.
3497  *
3498  * The return value is the number of fields actually read.
3499  */
3500 static int
3501 CopyReadAttributesText(CopyState cstate)
3502 {
3503         char            delimc = cstate->delim[0];
3504         int                     fieldno;
3505         char       *output_ptr;
3506         char       *cur_ptr;
3507         char       *line_end_ptr;
3508
3509         /*
3510          * We need a special case for zero-column tables: check that the input
3511          * line is empty, and return.
3512          */
3513         if (cstate->max_fields <= 0)
3514         {
3515                 if (cstate->line_buf.len != 0)
3516                         ereport(ERROR,
3517                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3518                                          errmsg("extra data after last expected column")));
3519                 return 0;
3520         }
3521
3522         resetStringInfo(&cstate->attribute_buf);
3523
3524         /*
3525          * The de-escaped attributes will certainly not be longer than the input
3526          * data line, so we can just force attribute_buf to be large enough and
3527          * then transfer data without any checks for enough space.  We need to do
3528          * it this way because enlarging attribute_buf mid-stream would invalidate
3529          * pointers already stored into cstate->raw_fields[].
3530          */
3531         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3532                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3533         output_ptr = cstate->attribute_buf.data;
3534
3535         /* set pointer variables for loop */
3536         cur_ptr = cstate->line_buf.data;
3537         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3538
3539         /* Outer loop iterates over fields */
3540         fieldno = 0;
3541         for (;;)
3542         {
3543                 bool            found_delim = false;
3544                 char       *start_ptr;
3545                 char       *end_ptr;
3546                 int                     input_len;
3547                 bool            saw_non_ascii = false;
3548
3549                 /* Make sure there is enough space for the next value */
3550                 if (fieldno >= cstate->max_fields)
3551                 {
3552                         cstate->max_fields *= 2;
3553                         cstate->raw_fields =
3554                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3555                 }
3556
3557                 /* Remember start of field on both input and output sides */
3558                 start_ptr = cur_ptr;
3559                 cstate->raw_fields[fieldno] = output_ptr;
3560
3561                 /*
3562                  * Scan data for field.
3563                  *
3564                  * Note that in this loop, we are scanning to locate the end of field
3565                  * and also speculatively performing de-escaping.  Once we find the
3566                  * end-of-field, we can match the raw field contents against the null
3567                  * marker string.  Only after that comparison fails do we know that
3568                  * de-escaping is actually the right thing to do; therefore we *must
3569                  * not* throw any syntax errors before we've done the null-marker
3570                  * check.
3571                  */
3572                 for (;;)
3573                 {
3574                         char            c;
3575
3576                         end_ptr = cur_ptr;
3577                         if (cur_ptr >= line_end_ptr)
3578                                 break;
3579                         c = *cur_ptr++;
3580                         if (c == delimc)
3581                         {
3582                                 found_delim = true;
3583                                 break;
3584                         }
3585                         if (c == '\\')
3586                         {
3587                                 if (cur_ptr >= line_end_ptr)
3588                                         break;
3589                                 c = *cur_ptr++;
3590                                 switch (c)
3591                                 {
3592                                         case '0':
3593                                         case '1':
3594                                         case '2':
3595                                         case '3':
3596                                         case '4':
3597                                         case '5':
3598                                         case '6':
3599                                         case '7':
3600                                                 {
3601                                                         /* handle \013 */
3602                                                         int                     val;
3603
3604                                                         val = OCTVALUE(c);
3605                                                         if (cur_ptr < line_end_ptr)
3606                                                         {
3607                                                                 c = *cur_ptr;
3608                                                                 if (ISOCTAL(c))
3609                                                                 {
3610                                                                         cur_ptr++;
3611                                                                         val = (val << 3) + OCTVALUE(c);
3612                                                                         if (cur_ptr < line_end_ptr)
3613                                                                         {
3614                                                                                 c = *cur_ptr;
3615                                                                                 if (ISOCTAL(c))
3616                                                                                 {
3617                                                                                         cur_ptr++;
3618                                                                                         val = (val << 3) + OCTVALUE(c);
3619                                                                                 }
3620                                                                         }
3621                                                                 }
3622                                                         }
3623                                                         c = val & 0377;
3624                                                         if (c == '\0' || IS_HIGHBIT_SET(c))
3625                                                                 saw_non_ascii = true;
3626                                                 }
3627                                                 break;
3628                                         case 'x':
3629                                                 /* Handle \x3F */
3630                                                 if (cur_ptr < line_end_ptr)
3631                                                 {
3632                                                         char            hexchar = *cur_ptr;
3633
3634                                                         if (isxdigit((unsigned char) hexchar))
3635                                                         {
3636                                                                 int                     val = GetDecimalFromHex(hexchar);
3637
3638                                                                 cur_ptr++;
3639                                                                 if (cur_ptr < line_end_ptr)
3640                                                                 {
3641                                                                         hexchar = *cur_ptr;
3642                                                                         if (isxdigit((unsigned char) hexchar))
3643                                                                         {
3644                                                                                 cur_ptr++;
3645                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
3646                                                                         }
3647                                                                 }
3648                                                                 c = val & 0xff;
3649                                                                 if (c == '\0' || IS_HIGHBIT_SET(c))
3650                                                                         saw_non_ascii = true;
3651                                                         }
3652                                                 }
3653                                                 break;
3654                                         case 'b':
3655                                                 c = '\b';
3656                                                 break;
3657                                         case 'f':
3658                                                 c = '\f';
3659                                                 break;
3660                                         case 'n':
3661                                                 c = '\n';
3662                                                 break;
3663                                         case 'r':
3664                                                 c = '\r';
3665                                                 break;
3666                                         case 't':
3667                                                 c = '\t';
3668                                                 break;
3669                                         case 'v':
3670                                                 c = '\v';
3671                                                 break;
3672
3673                                                 /*
3674                                                  * in all other cases, take the char after '\'
3675                                                  * literally
3676                                                  */
3677                                 }
3678                         }
3679
3680                         /* Add c to output string */
3681                         *output_ptr++ = c;
3682                 }
3683
3684                 /* Check whether raw input matched null marker */
3685                 input_len = end_ptr - start_ptr;
3686                 if (input_len == cstate->null_print_len &&
3687                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3688                         cstate->raw_fields[fieldno] = NULL;
3689                 else
3690                 {
3691                         /*
3692                          * At this point we know the field is supposed to contain data.
3693                          *
3694                          * If we de-escaped any non-7-bit-ASCII chars, make sure the
3695                          * resulting string is valid data for the db encoding.
3696                          */
3697                         if (saw_non_ascii)
3698                         {
3699                                 char       *fld = cstate->raw_fields[fieldno];
3700
3701                                 pg_verifymbstr(fld, output_ptr - fld, false);
3702                         }
3703                 }
3704
3705                 /* Terminate attribute value in output area */
3706                 *output_ptr++ = '\0';
3707
3708                 fieldno++;
3709                 /* Done if we hit EOL instead of a delim */
3710                 if (!found_delim)
3711                         break;
3712         }
3713
3714         /* Clean up state of attribute_buf */
3715         output_ptr--;
3716         Assert(*output_ptr == '\0');
3717         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3718
3719         return fieldno;
3720 }
3721
3722 /*
3723  * Parse the current line into separate attributes (fields),
3724  * performing de-escaping as needed.  This has exactly the same API as
3725  * CopyReadAttributesText, except we parse the fields according to
3726  * "standard" (i.e. common) CSV usage.
3727  */
3728 static int
3729 CopyReadAttributesCSV(CopyState cstate)
3730 {
3731         char            delimc = cstate->delim[0];
3732         char            quotec = cstate->quote[0];
3733         char            escapec = cstate->escape[0];
3734         int                     fieldno;
3735         char       *output_ptr;
3736         char       *cur_ptr;
3737         char       *line_end_ptr;
3738
3739         /*
3740          * We need a special case for zero-column tables: check that the input
3741          * line is empty, and return.
3742          */
3743         if (cstate->max_fields <= 0)
3744         {
3745                 if (cstate->line_buf.len != 0)
3746                         ereport(ERROR,
3747                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3748                                          errmsg("extra data after last expected column")));
3749                 return 0;
3750         }
3751
3752         resetStringInfo(&cstate->attribute_buf);
3753
3754         /*
3755          * The de-escaped attributes will certainly not be longer than the input
3756          * data line, so we can just force attribute_buf to be large enough and
3757          * then transfer data without any checks for enough space.  We need to do
3758          * it this way because enlarging attribute_buf mid-stream would invalidate
3759          * pointers already stored into cstate->raw_fields[].
3760          */
3761         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3762                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3763         output_ptr = cstate->attribute_buf.data;
3764
3765         /* set pointer variables for loop */
3766         cur_ptr = cstate->line_buf.data;
3767         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3768
3769         /* Outer loop iterates over fields */
3770         fieldno = 0;
3771         for (;;)
3772         {
3773                 bool            found_delim = false;
3774                 bool            saw_quote = false;
3775                 char       *start_ptr;
3776                 char       *end_ptr;
3777                 int                     input_len;
3778
3779                 /* Make sure there is enough space for the next value */
3780                 if (fieldno >= cstate->max_fields)
3781                 {
3782                         cstate->max_fields *= 2;
3783                         cstate->raw_fields =
3784                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3785                 }
3786
3787                 /* Remember start of field on both input and output sides */
3788                 start_ptr = cur_ptr;
3789                 cstate->raw_fields[fieldno] = output_ptr;
3790
3791                 /*
3792                  * Scan data for field,
3793                  *
3794                  * The loop starts in "not quote" mode and then toggles between that
3795                  * and "in quote" mode. The loop exits normally if it is in "not
3796                  * quote" mode and a delimiter or line end is seen.
3797                  */
3798                 for (;;)
3799                 {
3800                         char            c;
3801
3802                         /* Not in quote */
3803                         for (;;)
3804                         {
3805                                 end_ptr = cur_ptr;
3806                                 if (cur_ptr >= line_end_ptr)
3807                                         goto endfield;
3808                                 c = *cur_ptr++;
3809                                 /* unquoted field delimiter */
3810                                 if (c == delimc)
3811                                 {
3812                                         found_delim = true;
3813                                         goto endfield;
3814                                 }
3815                                 /* start of quoted field (or part of field) */
3816                                 if (c == quotec)
3817                                 {
3818                                         saw_quote = true;
3819                                         break;
3820                                 }
3821                                 /* Add c to output string */
3822                                 *output_ptr++ = c;
3823                         }
3824
3825                         /* In quote */
3826                         for (;;)
3827                         {
3828                                 end_ptr = cur_ptr;
3829                                 if (cur_ptr >= line_end_ptr)
3830                                         ereport(ERROR,
3831                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3832                                                          errmsg("unterminated CSV quoted field")));
3833
3834                                 c = *cur_ptr++;
3835
3836                                 /* escape within a quoted field */
3837                                 if (c == escapec)
3838                                 {
3839                                         /*
3840                                          * peek at the next char if available, and escape it if it
3841                                          * is an escape char or a quote char
3842                                          */
3843                                         if (cur_ptr < line_end_ptr)
3844                                         {
3845                                                 char            nextc = *cur_ptr;
3846
3847                                                 if (nextc == escapec || nextc == quotec)
3848                                                 {
3849                                                         *output_ptr++ = nextc;
3850                                                         cur_ptr++;
3851                                                         continue;
3852                                                 }
3853                                         }
3854                                 }
3855
3856                                 /*
3857                                  * end of quoted field. Must do this test after testing for
3858                                  * escape in case quote char and escape char are the same
3859                                  * (which is the common case).
3860                                  */
3861                                 if (c == quotec)
3862                                         break;
3863
3864                                 /* Add c to output string */
3865                                 *output_ptr++ = c;
3866                         }
3867                 }
3868 endfield:
3869
3870                 /* Terminate attribute value in output area */
3871                 *output_ptr++ = '\0';
3872
3873                 /* Check whether raw input matched null marker */
3874                 input_len = end_ptr - start_ptr;
3875                 if (!saw_quote && input_len == cstate->null_print_len &&
3876                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3877                         cstate->raw_fields[fieldno] = NULL;
3878
3879                 fieldno++;
3880                 /* Done if we hit EOL instead of a delim */
3881                 if (!found_delim)
3882                         break;
3883         }
3884
3885         /* Clean up state of attribute_buf */
3886         output_ptr--;
3887         Assert(*output_ptr == '\0');
3888         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3889
3890         return fieldno;
3891 }
3892
3893
3894 /*
3895  * Read a binary attribute
3896  */
3897 static Datum
3898 CopyReadBinaryAttribute(CopyState cstate,
3899                                                 int column_no, FmgrInfo *flinfo,
3900                                                 Oid typioparam, int32 typmod,
3901                                                 bool *isnull)
3902 {
3903         int32           fld_size;
3904         Datum           result;
3905
3906         if (!CopyGetInt32(cstate, &fld_size))
3907                 ereport(ERROR,
3908                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3909                                  errmsg("unexpected EOF in COPY data")));
3910         if (fld_size == -1)
3911         {
3912                 *isnull = true;
3913                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
3914         }
3915         if (fld_size < 0)
3916                 ereport(ERROR,
3917                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3918                                  errmsg("invalid field size")));
3919
3920         /* reset attribute_buf to empty, and load raw data in it */
3921         resetStringInfo(&cstate->attribute_buf);
3922
3923         enlargeStringInfo(&cstate->attribute_buf, fld_size);
3924         if (CopyGetData(cstate, cstate->attribute_buf.data,
3925                                         fld_size, fld_size) != fld_size)
3926                 ereport(ERROR,
3927                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3928                                  errmsg("unexpected EOF in COPY data")));
3929
3930         cstate->attribute_buf.len = fld_size;
3931         cstate->attribute_buf.data[fld_size] = '\0';
3932
3933         /* Call the column type's binary input converter */
3934         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
3935                                                                  typioparam, typmod);
3936
3937         /* Trouble if it didn't eat the whole buffer */
3938         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3939                 ereport(ERROR,
3940                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3941                                  errmsg("incorrect binary data format")));
3942
3943         *isnull = false;
3944         return result;
3945 }
3946
3947 /*
3948  * Send text representation of one attribute, with conversion and escaping
3949  */
3950 #define DUMPSOFAR() \
3951         do { \
3952                 if (ptr > start) \
3953                         CopySendData(cstate, start, ptr - start); \
3954         } while (0)
3955
3956 static void
3957 CopyAttributeOutText(CopyState cstate, char *string)
3958 {
3959         char       *ptr;
3960         char       *start;
3961         char            c;
3962         char            delimc = cstate->delim[0];
3963
3964         if (cstate->need_transcoding)
3965                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
3966         else
3967                 ptr = string;
3968
3969         /*
3970          * We have to grovel through the string searching for control characters
3971          * and instances of the delimiter character.  In most cases, though, these
3972          * are infrequent.  To avoid overhead from calling CopySendData once per
3973          * character, we dump out all characters between escaped characters in a
3974          * single call.  The loop invariant is that the data from "start" to "ptr"
3975          * can be sent literally, but hasn't yet been.
3976          *
3977          * We can skip pg_encoding_mblen() overhead when encoding is safe, because
3978          * in valid backend encodings, extra bytes of a multibyte character never
3979          * look like ASCII.  This loop is sufficiently performance-critical that
3980          * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
3981          * of the normal safe-encoding path.
3982          */
3983         if (cstate->encoding_embeds_ascii)
3984         {
3985                 start = ptr;
3986                 while ((c = *ptr) != '\0')
3987                 {
3988                         if ((unsigned char) c < (unsigned char) 0x20)
3989                         {
3990                                 /*
3991                                  * \r and \n must be escaped, the others are traditional. We
3992                                  * prefer to dump these using the C-like notation, rather than
3993                                  * a backslash and the literal character, because it makes the
3994                                  * dump file a bit more proof against Microsoftish data
3995                                  * mangling.
3996                                  */
3997                                 switch (c)
3998                                 {
3999                                         case '\b':
4000                                                 c = 'b';
4001                                                 break;
4002                                         case '\f':
4003                                                 c = 'f';
4004                                                 break;
4005                                         case '\n':
4006                                                 c = 'n';
4007                                                 break;
4008                                         case '\r':
4009                                                 c = 'r';
4010                                                 break;
4011                                         case '\t':
4012                                                 c = 't';
4013                                                 break;
4014                                         case '\v':
4015                                                 c = 'v';
4016                                                 break;
4017                                         default:
4018                                                 /* If it's the delimiter, must backslash it */
4019                                                 if (c == delimc)
4020                                                         break;
4021                                                 /* All ASCII control chars are length 1 */
4022                                                 ptr++;
4023                                                 continue;               /* fall to end of loop */
4024                                 }
4025                                 /* if we get here, we need to convert the control char */
4026                                 DUMPSOFAR();
4027                                 CopySendChar(cstate, '\\');
4028                                 CopySendChar(cstate, c);
4029                                 start = ++ptr;  /* do not include char in next run */
4030                         }
4031                         else if (c == '\\' || c == delimc)
4032                         {
4033                                 DUMPSOFAR();
4034                                 CopySendChar(cstate, '\\');
4035                                 start = ptr++;  /* we include char in next run */
4036                         }
4037                         else if (IS_HIGHBIT_SET(c))
4038                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
4039                         else
4040                                 ptr++;
4041                 }
4042         }
4043         else
4044         {
4045                 start = ptr;
4046                 while ((c = *ptr) != '\0')
4047                 {
4048                         if ((unsigned char) c < (unsigned char) 0x20)
4049                         {
4050                                 /*
4051                                  * \r and \n must be escaped, the others are traditional. We
4052                                  * prefer to dump these using the C-like notation, rather than
4053                                  * a backslash and the literal character, because it makes the
4054                                  * dump file a bit more proof against Microsoftish data
4055                                  * mangling.
4056                                  */
4057                                 switch (c)
4058                                 {
4059                                         case '\b':
4060                                                 c = 'b';
4061                                                 break;
4062                                         case '\f':
4063                                                 c = 'f';
4064                                                 break;
4065                                         case '\n':
4066                                                 c = 'n';
4067                                                 break;
4068                                         case '\r':
4069                                                 c = 'r';
4070                                                 break;
4071                                         case '\t':
4072                                                 c = 't';
4073                                                 break;
4074                                         case '\v':
4075                                                 c = 'v';
4076                                                 break;
4077                                         default:
4078                                                 /* If it's the delimiter, must backslash it */
4079                                                 if (c == delimc)
4080                                                         break;
4081                                                 /* All ASCII control chars are length 1 */
4082                                                 ptr++;
4083                                                 continue;               /* fall to end of loop */
4084                                 }
4085                                 /* if we get here, we need to convert the control char */
4086                                 DUMPSOFAR();
4087                                 CopySendChar(cstate, '\\');
4088                                 CopySendChar(cstate, c);
4089                                 start = ++ptr;  /* do not include char in next run */
4090                         }
4091                         else if (c == '\\' || c == delimc)
4092                         {
4093                                 DUMPSOFAR();
4094                                 CopySendChar(cstate, '\\');
4095                                 start = ptr++;  /* we include char in next run */
4096                         }
4097                         else
4098                                 ptr++;
4099                 }
4100         }
4101
4102         DUMPSOFAR();
4103 }
4104
4105 /*
4106  * Send text representation of one attribute, with conversion and
4107  * CSV-style escaping
4108  */
4109 static void
4110 CopyAttributeOutCSV(CopyState cstate, char *string,
4111                                         bool use_quote, bool single_attr)
4112 {
4113         char       *ptr;
4114         char       *start;
4115         char            c;
4116         char            delimc = cstate->delim[0];
4117         char            quotec = cstate->quote[0];
4118         char            escapec = cstate->escape[0];
4119
4120         /* force quoting if it matches null_print (before conversion!) */
4121         if (!use_quote && strcmp(string, cstate->null_print) == 0)
4122                 use_quote = true;
4123
4124         if (cstate->need_transcoding)
4125                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
4126         else
4127                 ptr = string;
4128
4129         /*
4130          * Make a preliminary pass to discover if it needs quoting
4131          */
4132         if (!use_quote)
4133         {
4134                 /*
4135                  * Because '\.' can be a data value, quote it if it appears alone on a
4136                  * line so it is not interpreted as the end-of-data marker.
4137                  */
4138                 if (single_attr && strcmp(ptr, "\\.") == 0)
4139                         use_quote = true;
4140                 else
4141                 {
4142                         char       *tptr = ptr;
4143
4144                         while ((c = *tptr) != '\0')
4145                         {
4146                                 if (c == delimc || c == quotec || c == '\n' || c == '\r')
4147                                 {
4148                                         use_quote = true;
4149                                         break;
4150                                 }
4151                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4152                                         tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
4153                                 else
4154                                         tptr++;
4155                         }
4156                 }
4157         }
4158
4159         if (use_quote)
4160         {
4161                 CopySendChar(cstate, quotec);
4162
4163                 /*
4164                  * We adopt the same optimization strategy as in CopyAttributeOutText
4165                  */
4166                 start = ptr;
4167                 while ((c = *ptr) != '\0')
4168                 {
4169                         if (c == quotec || c == escapec)
4170                         {
4171                                 DUMPSOFAR();
4172                                 CopySendChar(cstate, escapec);
4173                                 start = ptr;    /* we include char in next run */
4174                         }
4175                         if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4176                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
4177                         else
4178                                 ptr++;
4179                 }
4180                 DUMPSOFAR();
4181
4182                 CopySendChar(cstate, quotec);
4183         }
4184         else
4185         {
4186                 /* If it doesn't need quoting, we can just dump it as-is */
4187                 CopySendString(cstate, ptr);
4188         }
4189 }
4190
4191 /*
4192  * CopyGetAttnums - build an integer list of attnums to be copied
4193  *
4194  * The input attnamelist is either the user-specified column list,
4195  * or NIL if there was none (in which case we want all the non-dropped
4196  * columns).
4197  *
4198  * rel can be NULL ... it's only used for error reports.
4199  */
4200 static List *
4201 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
4202 {
4203         List       *attnums = NIL;
4204
4205         if (attnamelist == NIL)
4206         {
4207                 /* Generate default column list */
4208                 Form_pg_attribute *attr = tupDesc->attrs;
4209                 int                     attr_count = tupDesc->natts;
4210                 int                     i;
4211
4212                 for (i = 0; i < attr_count; i++)
4213                 {
4214                         if (attr[i]->attisdropped)
4215                                 continue;
4216                         attnums = lappend_int(attnums, i + 1);
4217                 }
4218         }
4219         else
4220         {
4221                 /* Validate the user-supplied list and extract attnums */
4222                 ListCell   *l;
4223
4224                 foreach(l, attnamelist)
4225                 {
4226                         char       *name = strVal(lfirst(l));
4227                         int                     attnum;
4228                         int                     i;
4229
4230                         /* Lookup column name */
4231                         attnum = InvalidAttrNumber;
4232                         for (i = 0; i < tupDesc->natts; i++)
4233                         {
4234                                 if (tupDesc->attrs[i]->attisdropped)
4235                                         continue;
4236                                 if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0)
4237                                 {
4238                                         attnum = tupDesc->attrs[i]->attnum;
4239                                         break;
4240                                 }
4241                         }
4242                         if (attnum == InvalidAttrNumber)
4243                         {
4244                                 if (rel != NULL)
4245                                         ereport(ERROR,
4246                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4247                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
4248                                                    name, RelationGetRelationName(rel))));
4249                                 else
4250                                         ereport(ERROR,
4251                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4252                                                          errmsg("column \"%s\" does not exist",
4253                                                                         name)));
4254                         }
4255                         /* Check for duplicates */
4256                         if (list_member_int(attnums, attnum))
4257                                 ereport(ERROR,
4258                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
4259                                                  errmsg("column \"%s\" specified more than once",
4260                                                                 name)));
4261                         attnums = lappend_int(attnums, attnum);
4262                 }
4263         }
4264
4265         return attnums;
4266 }
4267
4268
4269 /*
4270  * copy_dest_startup --- executor startup
4271  */
4272 static void
4273 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
4274 {
4275         /* no-op */
4276 }
4277
4278 /*
4279  * copy_dest_receive --- receive one tuple
4280  */
4281 static void
4282 copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
4283 {
4284         DR_copy    *myState = (DR_copy *) self;
4285         CopyState       cstate = myState->cstate;
4286
4287         /* Make sure the tuple is fully deconstructed */
4288         slot_getallattrs(slot);
4289
4290         /* And send the data */
4291         CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
4292         myState->processed++;
4293 }
4294
4295 /*
4296  * copy_dest_shutdown --- executor end
4297  */
4298 static void
4299 copy_dest_shutdown(DestReceiver *self)
4300 {
4301         /* no-op */
4302 }
4303
4304 /*
4305  * copy_dest_destroy --- release DestReceiver object
4306  */
4307 static void
4308 copy_dest_destroy(DestReceiver *self)
4309 {
4310         pfree(self);
4311 }
4312
4313 /*
4314  * CreateCopyDestReceiver -- create a suitable DestReceiver object
4315  */
4316 DestReceiver *
4317 CreateCopyDestReceiver(void)
4318 {
4319         DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
4320
4321         self->pub.receiveSlot = copy_dest_receive;
4322         self->pub.rStartup = copy_dest_startup;
4323         self->pub.rShutdown = copy_dest_shutdown;
4324         self->pub.rDestroy = copy_dest_destroy;
4325         self->pub.mydest = DestCopyOut;
4326
4327         self->cstate = NULL;            /* will be set later */
4328         self->processed = 0;
4329
4330         return (DestReceiver *) self;
4331 }