granicus.if.org Git - postgresql/blob - src/backend/commands/copy.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * copy.c
   4  *              Implements the COPY utility command
   5  *
   6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/commands/copy.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18 #include <unistd.h>
  19 #include <sys/stat.h>
  20 #include <netinet/in.h>
  21 #include <arpa/inet.h>
  22
  23 #include "access/heapam.h"
  24 #include "access/htup_details.h"
  25 #include "access/sysattr.h"
  26 #include "access/xact.h"
  27 #include "catalog/namespace.h"
  28 #include "catalog/pg_type.h"
  29 #include "commands/copy.h"
  30 #include "commands/defrem.h"
  31 #include "commands/trigger.h"
  32 #include "executor/executor.h"
  33 #include "libpq/libpq.h"
  34 #include "libpq/pqformat.h"
  35 #include "mb/pg_wchar.h"
  36 #include "miscadmin.h"
  37 #include "optimizer/clauses.h"
  38 #include "optimizer/planner.h"
  39 #include "parser/parse_relation.h"
  40 #include "rewrite/rewriteHandler.h"
  41 #include "storage/fd.h"
  42 #include "tcop/tcopprot.h"
  43 #include "utils/acl.h"
  44 #include "utils/builtins.h"
  45 #include "utils/lsyscache.h"
  46 #include "utils/memutils.h"
  47 #include "utils/portal.h"
  48 #include "utils/rel.h"
  49 #include "utils/snapmgr.h"
  50
  51
  52 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
  53 #define OCTVALUE(c) ((c) - '0')
  54
  55 /*
  56  * Represents the different source/dest cases we need to worry about at
  57  * the bottom level
  58  */
  59 typedef enum CopyDest
  60 {
  61         COPY_FILE,                                      /* to/from file (or a piped program) */
  62         COPY_OLD_FE,                            /* to/from frontend (2.0 protocol) */
  63         COPY_NEW_FE                                     /* to/from frontend (3.0 protocol) */
  64 } CopyDest;
  65
  66 /*
  67  *      Represents the end-of-line terminator type of the input
  68  */
  69 typedef enum EolType
  70 {
  71         EOL_UNKNOWN,
  72         EOL_NL,
  73         EOL_CR,
  74         EOL_CRNL
  75 } EolType;
  76
  77 /*
  78  * This struct contains all the state variables used throughout a COPY
  79  * operation. For simplicity, we use the same struct for all variants of COPY,
  80  * even though some fields are used in only some cases.
  81  *
  82  * Multi-byte encodings: all supported client-side encodings encode multi-byte
  83  * characters by having the first byte's high bit set. Subsequent bytes of the
  84  * character can have the high bit not set. When scanning data in such an
  85  * encoding to look for a match to a single-byte (ie ASCII) character, we must
  86  * use the full pg_encoding_mblen() machinery to skip over multibyte
  87  * characters, else we might find a false match to a trailing byte. In
  88  * supported server encodings, there is no possibility of a false match, and
  89  * it's faster to make useless comparisons to trailing bytes than it is to
  90  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
  91  * when we have to do it the hard way.
  92  */
  93 typedef struct CopyStateData
  94 {
  95         /* low-level state data */
  96         CopyDest        copy_dest;              /* type of copy source/destination */
  97         FILE       *copy_file;          /* used if copy_dest == COPY_FILE */
  98         StringInfo      fe_msgbuf;              /* used for all dests during COPY TO, only for
  99                                                                  * dest == COPY_NEW_FE in COPY FROM */
 100         bool            fe_eof;                 /* true if detected end of copy data */
 101         EolType         eol_type;               /* EOL type of input */
 102         int                     file_encoding;  /* file or remote side's character encoding */
 103         bool            need_transcoding;               /* file encoding diff from server? */
 104         bool            encoding_embeds_ascii;  /* ASCII can be non-first byte? */
 105
 106         /* parameters from the COPY command */
 107         Relation        rel;                    /* relation to copy to or from */
 108         QueryDesc  *queryDesc;          /* executable query to copy from */
 109         List       *attnumlist;         /* integer list of attnums to copy */
 110         char       *filename;           /* filename, or NULL for STDIN/STDOUT */
 111         bool            is_program;             /* is 'filename' a program to popen? */
 112         bool            binary;                 /* binary format? */
 113         bool            oids;                   /* include OIDs? */
 114         bool            freeze;                 /* freeze rows on loading? */
 115         bool            csv_mode;               /* Comma Separated Value format? */
 116         bool            header_line;    /* CSV header line? */
 117         char       *null_print;         /* NULL marker string (server encoding!) */
 118         int                     null_print_len; /* length of same */
 119         char       *null_print_client;          /* same converted to file encoding */
 120         char       *delim;                      /* column delimiter (must be 1 byte) */
 121         char       *quote;                      /* CSV quote char (must be 1 byte) */
 122         char       *escape;                     /* CSV escape char (must be 1 byte) */
 123         List       *force_quote;        /* list of column names */
 124         bool            force_quote_all;        /* FORCE QUOTE *? */
 125         bool       *force_quote_flags;          /* per-column CSV FQ flags */
 126         List       *force_notnull;      /* list of column names */
 127         bool       *force_notnull_flags;        /* per-column CSV FNN flags */
 128         bool            convert_selectively;    /* do selective binary conversion? */
 129         List       *convert_select; /* list of column names (can be NIL) */
 130         bool       *convert_select_flags;       /* per-column CSV/TEXT CS flags */
 131
 132         /* these are just for error messages, see CopyFromErrorCallback */
 133         const char *cur_relname;        /* table name for error messages */
 134         int                     cur_lineno;             /* line number for error messages */
 135         const char *cur_attname;        /* current att for error messages */
 136         const char *cur_attval;         /* current att value for error messages */
 137
 138         /*
 139          * Working state for COPY TO/FROM
 140          */
 141         MemoryContext copycontext;      /* per-copy execution context */
 142
 143         /*
 144          * Working state for COPY TO
 145          */
 146         FmgrInfo   *out_functions;      /* lookup info for output functions */
 147         MemoryContext rowcontext;       /* per-row evaluation context */
 148
 149         /*
 150          * Working state for COPY FROM
 151          */
 152         AttrNumber      num_defaults;
 153         bool            file_has_oids;
 154         FmgrInfo        oid_in_function;
 155         Oid                     oid_typioparam;
 156         FmgrInfo   *in_functions;       /* array of input functions for each attrs */
 157         Oid                *typioparams;        /* array of element types for in_functions */
 158         int                *defmap;                     /* array of default att numbers */
 159         ExprState **defexprs;           /* array of default att expressions */
 160         bool            volatile_defexprs;              /* is any of defexprs volatile? */
 161
 162         /*
 163          * These variables are used to reduce overhead in textual COPY FROM.
 164          *
 165          * attribute_buf holds the separated, de-escaped text for each field of
 166          * the current line.  The CopyReadAttributes functions return arrays of
 167          * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
 168          * the buffer on each cycle.
 169          */
 170         StringInfoData attribute_buf;
 171
 172         /* field raw data pointers found by COPY FROM */
 173
 174         int                     max_fields;
 175         char      **raw_fields;
 176
 177         /*
 178          * Similarly, line_buf holds the whole input line being processed. The
 179          * input cycle is first to read the whole line into line_buf, convert it
 180          * to server encoding there, and then extract the individual attribute
 181          * fields into attribute_buf.  line_buf is preserved unmodified so that we
 182          * can display it in error messages if appropriate.
 183          */
 184         StringInfoData line_buf;
 185         bool            line_buf_converted;             /* converted to server encoding? */
 186         bool            line_buf_valid; /* contains the row being processed? */
 187
 188         /*
 189          * Finally, raw_buf holds raw data read from the data source (file or
 190          * client connection).  CopyReadLine parses this data sufficiently to
 191          * locate line boundaries, then transfers the data to line_buf and
 192          * converts it.  Note: we guarantee that there is a \0 at
 193          * raw_buf[raw_buf_len].
 194          */
 195 #define RAW_BUF_SIZE 65536              /* we palloc RAW_BUF_SIZE+1 bytes */
 196         char       *raw_buf;
 197         int                     raw_buf_index;  /* next byte to process */
 198         int                     raw_buf_len;    /* total # of bytes stored */
 199 } CopyStateData;
 200
 201 /* DestReceiver for COPY (SELECT) TO */
 202 typedef struct
 203 {
 204         DestReceiver pub;                       /* publicly-known function pointers */
 205         CopyState       cstate;                 /* CopyStateData for the command */
 206         uint64          processed;              /* # of tuples processed */
 207 } DR_copy;
 208
 209
 210 /*
 211  * These macros centralize code used to process line_buf and raw_buf buffers.
 212  * They are macros because they often do continue/break control and to avoid
 213  * function call overhead in tight COPY loops.
 214  *
 215  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
 216  * prevent the continue/break processing from working.  We end the "if (1)"
 217  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
 218  * any "else" in the calling code, and to avoid any compiler warnings about
 219  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
 220  */
 221
 222 /*
 223  * This keeps the character read at the top of the loop in the buffer
 224  * even if there is more than one read-ahead.
 225  */
 226 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
 227 if (1) \
 228 { \
 229         if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
 230         { \
 231                 raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
 232                 need_data = true; \
 233                 continue; \
 234         } \
 235 } else ((void) 0)
 236
 237 /* This consumes the remainder of the buffer and breaks */
 238 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
 239 if (1) \
 240 { \
 241         if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
 242         { \
 243                 if (extralen) \
 244                         raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
 245                 /* backslash just before EOF, treat as data char */ \
 246                 result = true; \
 247                 break; \
 248         } \
 249 } else ((void) 0)
 250
 251 /*
 252  * Transfer any approved data to line_buf; must do this to be sure
 253  * there is some room in raw_buf.
 254  */
 255 #define REFILL_LINEBUF \
 256 if (1) \
 257 { \
 258         if (raw_buf_ptr > cstate->raw_buf_index) \
 259         { \
 260                 appendBinaryStringInfo(&cstate->line_buf, \
 261                                                          cstate->raw_buf + cstate->raw_buf_index, \
 262                                                            raw_buf_ptr - cstate->raw_buf_index); \
 263                 cstate->raw_buf_index = raw_buf_ptr; \
 264         } \
 265 } else ((void) 0)
 266
 267 /* Undo any read-ahead and jump out of the block. */
 268 #define NO_END_OF_COPY_GOTO \
 269 if (1) \
 270 { \
 271         raw_buf_ptr = prev_raw_ptr + 1; \
 272         goto not_end_of_copy; \
 273 } else ((void) 0)
 274
 275 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 276
 277
 278 /* non-export function prototypes */
 279 static CopyState BeginCopy(bool is_from, Relation rel, Node *raw_query,
 280                   const char *queryString, List *attnamelist, List *options);
 281 static void EndCopy(CopyState cstate);
 282 static void ClosePipeToProgram(CopyState cstate);
 283 static CopyState BeginCopyTo(Relation rel, Node *query, const char *queryString,
 284                         const char *filename, bool is_program, List *attnamelist,
 285                         List *options);
 286 static void EndCopyTo(CopyState cstate);
 287 static uint64 DoCopyTo(CopyState cstate);
 288 static uint64 CopyTo(CopyState cstate);
 289 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
 290                          Datum *values, bool *nulls);
 291 static uint64 CopyFrom(CopyState cstate);
 292 static void CopyFromInsertBatch(CopyState cstate, EState *estate,
 293                                         CommandId mycid, int hi_options,
 294                                         ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
 295                                         BulkInsertState bistate,
 296                                         int nBufferedTuples, HeapTuple *bufferedTuples,
 297                                         int firstBufferedLineNo);
 298 static bool CopyReadLine(CopyState cstate);
 299 static bool CopyReadLineText(CopyState cstate);
 300 static int      CopyReadAttributesText(CopyState cstate);
 301 static int      CopyReadAttributesCSV(CopyState cstate);
 302 static Datum CopyReadBinaryAttribute(CopyState cstate,
 303                                                 int column_no, FmgrInfo *flinfo,
 304                                                 Oid typioparam, int32 typmod,
 305                                                 bool *isnull);
 306 static void CopyAttributeOutText(CopyState cstate, char *string);
 307 static void CopyAttributeOutCSV(CopyState cstate, char *string,
 308                                         bool use_quote, bool single_attr);
 309 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
 310                            List *attnamelist);
 311 static char *limit_printout_length(const char *str);
 312
 313 /* Low-level communications functions */
 314 static void SendCopyBegin(CopyState cstate);
 315 static void ReceiveCopyBegin(CopyState cstate);
 316 static void SendCopyEnd(CopyState cstate);
 317 static void CopySendData(CopyState cstate, const void *databuf, int datasize);
 318 static void CopySendString(CopyState cstate, const char *str);
 319 static void CopySendChar(CopyState cstate, char c);
 320 static void CopySendEndOfRow(CopyState cstate);
 321 static int CopyGetData(CopyState cstate, void *databuf,
 322                         int minread, int maxread);
 323 static void CopySendInt32(CopyState cstate, int32 val);
 324 static bool CopyGetInt32(CopyState cstate, int32 *val);
 325 static void CopySendInt16(CopyState cstate, int16 val);
 326 static bool CopyGetInt16(CopyState cstate, int16 *val);
 327
 328
 329 /*
 330  * Send copy start/stop messages for frontend copies.  These have changed
 331  * in past protocol redesigns.
 332  */
 333 static void
 334 SendCopyBegin(CopyState cstate)
 335 {
 336         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
 337         {
 338                 /* new way */
 339                 StringInfoData buf;
 340                 int                     natts = list_length(cstate->attnumlist);
 341                 int16           format = (cstate->binary ? 1 : 0);
 342                 int                     i;
 343
 344                 pq_beginmessage(&buf, 'H');
 345                 pq_sendbyte(&buf, format);              /* overall format */
 346                 pq_sendint(&buf, natts, 2);
 347                 for (i = 0; i < natts; i++)
 348                         pq_sendint(&buf, format, 2);            /* per-column formats */
 349                 pq_endmessage(&buf);
 350                 cstate->copy_dest = COPY_NEW_FE;
 351         }
 352         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 353         {
 354                 /* old way */
 355                 if (cstate->binary)
 356                         ereport(ERROR,
 357                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 358                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 359                 pq_putemptymessage('H');
 360                 /* grottiness needed for old COPY OUT protocol */
 361                 pq_startcopyout();
 362                 cstate->copy_dest = COPY_OLD_FE;
 363         }
 364         else
 365         {
 366                 /* very old way */
 367                 if (cstate->binary)
 368                         ereport(ERROR,
 369                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 370                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 371                 pq_putemptymessage('B');
 372                 /* grottiness needed for old COPY OUT protocol */
 373                 pq_startcopyout();
 374                 cstate->copy_dest = COPY_OLD_FE;
 375         }
 376 }
 377
 378 static void
 379 ReceiveCopyBegin(CopyState cstate)
 380 {
 381         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
 382         {
 383                 /* new way */
 384                 StringInfoData buf;
 385                 int                     natts = list_length(cstate->attnumlist);
 386                 int16           format = (cstate->binary ? 1 : 0);
 387                 int                     i;
 388
 389                 pq_beginmessage(&buf, 'G');
 390                 pq_sendbyte(&buf, format);              /* overall format */
 391                 pq_sendint(&buf, natts, 2);
 392                 for (i = 0; i < natts; i++)
 393                         pq_sendint(&buf, format, 2);            /* per-column formats */
 394                 pq_endmessage(&buf);
 395                 cstate->copy_dest = COPY_NEW_FE;
 396                 cstate->fe_msgbuf = makeStringInfo();
 397         }
 398         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 399         {
 400                 /* old way */
 401                 if (cstate->binary)
 402                         ereport(ERROR,
 403                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 404                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 405                 pq_putemptymessage('G');
 406                 cstate->copy_dest = COPY_OLD_FE;
 407         }
 408         else
 409         {
 410                 /* very old way */
 411                 if (cstate->binary)
 412                         ereport(ERROR,
 413                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 414                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 415                 pq_putemptymessage('D');
 416                 cstate->copy_dest = COPY_OLD_FE;
 417         }
 418         /* We *must* flush here to ensure FE knows it can send. */
 419         pq_flush();
 420 }
 421
 422 static void
 423 SendCopyEnd(CopyState cstate)
 424 {
 425         if (cstate->copy_dest == COPY_NEW_FE)
 426         {
 427                 /* Shouldn't have any unsent data */
 428                 Assert(cstate->fe_msgbuf->len == 0);
 429                 /* Send Copy Done message */
 430                 pq_putemptymessage('c');
 431         }
 432         else
 433         {
 434                 CopySendData(cstate, "\\.", 2);
 435                 /* Need to flush out the trailer (this also appends a newline) */
 436                 CopySendEndOfRow(cstate);
 437                 pq_endcopyout(false);
 438         }
 439 }
 440
 441 /*----------
 442  * CopySendData sends output data to the destination (file or frontend)
 443  * CopySendString does the same for null-terminated strings
 444  * CopySendChar does the same for single characters
 445  * CopySendEndOfRow does the appropriate thing at end of each data row
 446  *      (data is not actually flushed except by CopySendEndOfRow)
 447  *
 448  * NB: no data conversion is applied by these functions
 449  *----------
 450  */
 451 static void
 452 CopySendData(CopyState cstate, const void *databuf, int datasize)
 453 {
 454         appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
 455 }
 456
 457 static void
 458 CopySendString(CopyState cstate, const char *str)
 459 {
 460         appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
 461 }
 462
 463 static void
 464 CopySendChar(CopyState cstate, char c)
 465 {
 466         appendStringInfoCharMacro(cstate->fe_msgbuf, c);
 467 }
 468
 469 static void
 470 CopySendEndOfRow(CopyState cstate)
 471 {
 472         StringInfo      fe_msgbuf = cstate->fe_msgbuf;
 473
 474         switch (cstate->copy_dest)
 475         {
 476                 case COPY_FILE:
 477                         if (!cstate->binary)
 478                         {
 479                                 /* Default line termination depends on platform */
 480 #ifndef WIN32
 481                                 CopySendChar(cstate, '\n');
 482 #else
 483                                 CopySendString(cstate, "\r\n");
 484 #endif
 485                         }
 486
 487                         if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
 488                                            cstate->copy_file) != 1 ||
 489                                 ferror(cstate->copy_file))
 490                         {
 491                                 if (cstate->is_program)
 492                                 {
 493                                         if (errno == EPIPE)
 494                                         {
 495                                                 /*
 496                                                  * The pipe will be closed automatically on error at
 497                                                  * the end of transaction, but we might get a better
 498                                                  * error message from the subprocess' exit code than
 499                                                  * just "Broken Pipe"
 500                                                  */
 501                                                 ClosePipeToProgram(cstate);
 502
 503                                                 /*
 504                                                  * If ClosePipeToProgram() didn't throw an error, the
 505                                                  * program terminated normally, but closed the pipe
 506                                                  * first. Restore errno, and throw an error.
 507                                                  */
 508                                                 errno = EPIPE;
 509                                         }
 510                                         ereport(ERROR,
 511                                                         (errcode_for_file_access(),
 512                                                          errmsg("could not write to COPY program: %m")));
 513                                 }
 514                                 else
 515                                         ereport(ERROR,
 516                                                         (errcode_for_file_access(),
 517                                                          errmsg("could not write to COPY file: %m")));
 518                         }
 519                         break;
 520                 case COPY_OLD_FE:
 521                         /* The FE/BE protocol uses \n as newline for all platforms */
 522                         if (!cstate->binary)
 523                                 CopySendChar(cstate, '\n');
 524
 525                         if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
 526                         {
 527                                 /* no hope of recovering connection sync, so FATAL */
 528                                 ereport(FATAL,
 529                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 530                                                  errmsg("connection lost during COPY to stdout")));
 531                         }
 532                         break;
 533                 case COPY_NEW_FE:
 534                         /* The FE/BE protocol uses \n as newline for all platforms */
 535                         if (!cstate->binary)
 536                                 CopySendChar(cstate, '\n');
 537
 538                         /* Dump the accumulated row as one CopyData message */
 539                         (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
 540                         break;
 541         }
 542
 543         resetStringInfo(fe_msgbuf);
 544 }
 545
 546 /*
 547  * CopyGetData reads data from the source (file or frontend)
 548  *
 549  * We attempt to read at least minread, and at most maxread, bytes from
 550  * the source.  The actual number of bytes read is returned; if this is
 551  * less than minread, EOF was detected.
 552  *
 553  * Note: when copying from the frontend, we expect a proper EOF mark per
 554  * protocol; if the frontend simply drops the connection, we raise error.
 555  * It seems unwise to allow the COPY IN to complete normally in that case.
 556  *
 557  * NB: no data conversion is applied here.
 558  */
 559 static int
 560 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
 561 {
 562         int                     bytesread = 0;
 563
 564         switch (cstate->copy_dest)
 565         {
 566                 case COPY_FILE:
 567                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
 568                         if (ferror(cstate->copy_file))
 569                                 ereport(ERROR,
 570                                                 (errcode_for_file_access(),
 571                                                  errmsg("could not read from COPY file: %m")));
 572                         break;
 573                 case COPY_OLD_FE:
 574
 575                         /*
 576                          * We cannot read more than minread bytes (which in practice is 1)
 577                          * because old protocol doesn't have any clear way of separating
 578                          * the COPY stream from following data.  This is slow, but not any
 579                          * slower than the code path was originally, and we don't care
 580                          * much anymore about the performance of old protocol.
 581                          */
 582                         if (pq_getbytes((char *) databuf, minread))
 583                         {
 584                                 /* Only a \. terminator is legal EOF in old protocol */
 585                                 ereport(ERROR,
 586                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 587                                                  errmsg("unexpected EOF on client connection with an open transaction")));
 588                         }
 589                         bytesread = minread;
 590                         break;
 591                 case COPY_NEW_FE:
 592                         while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
 593                         {
 594                                 int                     avail;
 595
 596                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
 597                                 {
 598                                         /* Try to receive another message */
 599                                         int                     mtype;
 600
 601                         readmessage:
 602                                         mtype = pq_getbyte();
 603                                         if (mtype == EOF)
 604                                                 ereport(ERROR,
 605                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 606                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
 607                                         if (pq_getmessage(cstate->fe_msgbuf, 0))
 608                                                 ereport(ERROR,
 609                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 610                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
 611                                         switch (mtype)
 612                                         {
 613                                                 case 'd':               /* CopyData */
 614                                                         break;
 615                                                 case 'c':               /* CopyDone */
 616                                                         /* COPY IN correctly terminated by frontend */
 617                                                         cstate->fe_eof = true;
 618                                                         return bytesread;
 619                                                 case 'f':               /* CopyFail */
 620                                                         ereport(ERROR,
 621                                                                         (errcode(ERRCODE_QUERY_CANCELED),
 622                                                                          errmsg("COPY from stdin failed: %s",
 623                                                                            pq_getmsgstring(cstate->fe_msgbuf))));
 624                                                         break;
 625                                                 case 'H':               /* Flush */
 626                                                 case 'S':               /* Sync */
 627
 628                                                         /*
 629                                                          * Ignore Flush/Sync for the convenience of client
 630                                                          * libraries (such as libpq) that may send those
 631                                                          * without noticing that the command they just
 632                                                          * sent was COPY.
 633                                                          */
 634                                                         goto readmessage;
 635                                                 default:
 636                                                         ereport(ERROR,
 637                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
 638                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
 639                                                                                         mtype)));
 640                                                         break;
 641                                         }
 642                                 }
 643                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
 644                                 if (avail > maxread)
 645                                         avail = maxread;
 646                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
 647                                 databuf = (void *) ((char *) databuf + avail);
 648                                 maxread -= avail;
 649                                 bytesread += avail;
 650                         }
 651                         break;
 652         }
 653
 654         return bytesread;
 655 }
 656
 657
 658 /*
 659  * These functions do apply some data conversion
 660  */
 661
 662 /*
 663  * CopySendInt32 sends an int32 in network byte order
 664  */
 665 static void
 666 CopySendInt32(CopyState cstate, int32 val)
 667 {
 668         uint32          buf;
 669
 670         buf = htonl((uint32) val);
 671         CopySendData(cstate, &buf, sizeof(buf));
 672 }
 673
 674 /*
 675  * CopyGetInt32 reads an int32 that appears in network byte order
 676  *
 677  * Returns true if OK, false if EOF
 678  */
 679 static bool
 680 CopyGetInt32(CopyState cstate, int32 *val)
 681 {
 682         uint32          buf;
 683
 684         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
 685         {
 686                 *val = 0;                               /* suppress compiler warning */
 687                 return false;
 688         }
 689         *val = (int32) ntohl(buf);
 690         return true;
 691 }
 692
 693 /*
 694  * CopySendInt16 sends an int16 in network byte order
 695  */
 696 static void
 697 CopySendInt16(CopyState cstate, int16 val)
 698 {
 699         uint16          buf;
 700
 701         buf = htons((uint16) val);
 702         CopySendData(cstate, &buf, sizeof(buf));
 703 }
 704
 705 /*
 706  * CopyGetInt16 reads an int16 that appears in network byte order
 707  */
 708 static bool
 709 CopyGetInt16(CopyState cstate, int16 *val)
 710 {
 711         uint16          buf;
 712
 713         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
 714         {
 715                 *val = 0;                               /* suppress compiler warning */
 716                 return false;
 717         }
 718         *val = (int16) ntohs(buf);
 719         return true;
 720 }
 721
 722
 723 /*
 724  * CopyLoadRawBuf loads some more data into raw_buf
 725  *
 726  * Returns TRUE if able to obtain at least one more byte, else FALSE.
 727  *
 728  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
 729  * down to the start of the buffer and then we load more data after that.
 730  * This case is used only when a frontend multibyte character crosses a
 731  * bufferload boundary.
 732  */
 733 static bool
 734 CopyLoadRawBuf(CopyState cstate)
 735 {
 736         int                     nbytes;
 737         int                     inbytes;
 738
 739         if (cstate->raw_buf_index < cstate->raw_buf_len)
 740         {
 741                 /* Copy down the unprocessed data */
 742                 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
 743                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
 744                                 nbytes);
 745         }
 746         else
 747                 nbytes = 0;                             /* no data need be saved */
 748
 749         inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
 750                                                   1, RAW_BUF_SIZE - nbytes);
 751         nbytes += inbytes;
 752         cstate->raw_buf[nbytes] = '\0';
 753         cstate->raw_buf_index = 0;
 754         cstate->raw_buf_len = nbytes;
 755         return (inbytes > 0);
 756 }
 757
 758
 759 /*
 760  *       DoCopy executes the SQL COPY statement
 761  *
 762  * Either unload or reload contents of table <relation>, depending on <from>.
 763  * (<from> = TRUE means we are inserting into the table.)  In the "TO" case
 764  * we also support copying the output of an arbitrary SELECT query.
 765  *
 766  * If <pipe> is false, transfer is between the table and the file named
 767  * <filename>.  Otherwise, transfer is between the table and our regular
 768  * input/output stream. The latter could be either stdin/stdout or a
 769  * socket, depending on whether we're running under Postmaster control.
 770  *
 771  * Do not allow a Postgres user without superuser privilege to read from
 772  * or write to a file.
 773  *
 774  * Do not allow the copy if user doesn't have proper permission to access
 775  * the table or the specifically requested columns.
 776  */
 777 Oid
 778 DoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed)
 779 {
 780         CopyState       cstate;
 781         bool            is_from = stmt->is_from;
 782         bool            pipe = (stmt->filename == NULL);
 783         Relation        rel;
 784         Oid                     relid;
 785
 786         /* Disallow COPY to/from file or program except to superusers. */
 787         if (!pipe && !superuser())
 788         {
 789                 if (stmt->is_program)
 790                         ereport(ERROR,
 791                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 792                                          errmsg("must be superuser to COPY to or from an external program"),
 793                                          errhint("Anyone can COPY to stdout or from stdin. "
 794                                                    "psql's \\copy command also works for anyone.")));
 795                 else
 796                         ereport(ERROR,
 797                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 798                                          errmsg("must be superuser to COPY to or from a file"),
 799                                          errhint("Anyone can COPY to stdout or from stdin. "
 800                                                    "psql's \\copy command also works for anyone.")));
 801         }
 802
 803         if (stmt->relation)
 804         {
 805                 TupleDesc       tupDesc;
 806                 AclMode         required_access = (is_from ? ACL_INSERT : ACL_SELECT);
 807                 RangeTblEntry *rte;
 808                 List       *attnums;
 809                 ListCell   *cur;
 810
 811                 Assert(!stmt->query);
 812
 813                 /* Open and lock the relation, using the appropriate lock type. */
 814                 rel = heap_openrv(stmt->relation,
 815                                                   (is_from ? RowExclusiveLock : AccessShareLock));
 816
 817                 relid = RelationGetRelid(rel);
 818
 819                 rte = makeNode(RangeTblEntry);
 820                 rte->rtekind = RTE_RELATION;
 821                 rte->relid = RelationGetRelid(rel);
 822                 rte->relkind = rel->rd_rel->relkind;
 823                 rte->requiredPerms = required_access;
 824
 825                 tupDesc = RelationGetDescr(rel);
 826                 attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
 827                 foreach(cur, attnums)
 828                 {
 829                         int                     attno = lfirst_int(cur) -
 830                         FirstLowInvalidHeapAttributeNumber;
 831
 832                         if (is_from)
 833                                 rte->modifiedCols = bms_add_member(rte->modifiedCols, attno);
 834                         else
 835                                 rte->selectedCols = bms_add_member(rte->selectedCols, attno);
 836                 }
 837                 ExecCheckRTPerms(list_make1(rte), true);
 838         }
 839         else
 840         {
 841                 Assert(stmt->query);
 842
 843                 relid = InvalidOid;
 844                 rel = NULL;
 845         }
 846
 847         if (is_from)
 848         {
 849                 Assert(rel);
 850
 851                 /* check read-only transaction */
 852                 if (XactReadOnly && !rel->rd_islocaltemp)
 853                         PreventCommandIfReadOnly("COPY FROM");
 854
 855                 cstate = BeginCopyFrom(rel, stmt->filename, stmt->is_program,
 856                                                            stmt->attlist, stmt->options);
 857                 *processed = CopyFrom(cstate);  /* copy from file to database */
 858                 EndCopyFrom(cstate);
 859         }
 860         else
 861         {
 862                 cstate = BeginCopyTo(rel, stmt->query, queryString,
 863                                                          stmt->filename, stmt->is_program,
 864                                                          stmt->attlist, stmt->options);
 865                 *processed = DoCopyTo(cstate);  /* copy from database to file */
 866                 EndCopyTo(cstate);
 867         }
 868
 869         /*
 870          * Close the relation. If reading, we can release the AccessShareLock we
 871          * got; if writing, we should hold the lock until end of transaction to
 872          * ensure that updates will be committed before lock is released.
 873          */
 874         if (rel != NULL)
 875                 heap_close(rel, (is_from ? NoLock : AccessShareLock));
 876
 877         return relid;
 878 }
 879
 880 /*
 881  * Process the statement option list for COPY.
 882  *
 883  * Scan the options list (a list of DefElem) and transpose the information
 884  * into cstate, applying appropriate error checking.
 885  *
 886  * cstate is assumed to be filled with zeroes initially.
 887  *
 888  * This is exported so that external users of the COPY API can sanity-check
 889  * a list of options.  In that usage, cstate should be passed as NULL
 890  * (since external users don't know sizeof(CopyStateData)) and the collected
 891  * data is just leaked until CurrentMemoryContext is reset.
 892  *
 893  * Note that additional checking, such as whether column names listed in FORCE
 894  * QUOTE actually exist, has to be applied later.  This just checks for
 895  * self-consistency of the options list.
 896  */
 897 void
 898 ProcessCopyOptions(CopyState cstate,
 899                                    bool is_from,
 900                                    List *options)
 901 {
 902         bool            format_specified = false;
 903         ListCell   *option;
 904
 905         /* Support external use for option sanity checking */
 906         if (cstate == NULL)
 907                 cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
 908
 909         cstate->file_encoding = -1;
 910
 911         /* Extract options from the statement node tree */
 912         foreach(option, options)
 913         {
 914                 DefElem    *defel = (DefElem *) lfirst(option);
 915
 916                 if (strcmp(defel->defname, "format") == 0)
 917                 {
 918                         char       *fmt = defGetString(defel);
 919
 920                         if (format_specified)
 921                                 ereport(ERROR,
 922                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 923                                                  errmsg("conflicting or redundant options")));
 924                         format_specified = true;
 925                         if (strcmp(fmt, "text") == 0)
 926                                  /* default format */ ;
 927                         else if (strcmp(fmt, "csv") == 0)
 928                                 cstate->csv_mode = true;
 929                         else if (strcmp(fmt, "binary") == 0)
 930                                 cstate->binary = true;
 931                         else
 932                                 ereport(ERROR,
 933                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 934                                                  errmsg("COPY format \"%s\" not recognized", fmt)));
 935                 }
 936                 else if (strcmp(defel->defname, "oids") == 0)
 937                 {
 938                         if (cstate->oids)
 939                                 ereport(ERROR,
 940                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 941                                                  errmsg("conflicting or redundant options")));
 942                         cstate->oids = defGetBoolean(defel);
 943                 }
 944                 else if (strcmp(defel->defname, "freeze") == 0)
 945                 {
 946                         if (cstate->freeze)
 947                                 ereport(ERROR,
 948                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 949                                                  errmsg("conflicting or redundant options")));
 950                         cstate->freeze = defGetBoolean(defel);
 951                 }
 952                 else if (strcmp(defel->defname, "delimiter") == 0)
 953                 {
 954                         if (cstate->delim)
 955                                 ereport(ERROR,
 956                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 957                                                  errmsg("conflicting or redundant options")));
 958                         cstate->delim = defGetString(defel);
 959                 }
 960                 else if (strcmp(defel->defname, "null") == 0)
 961                 {
 962                         if (cstate->null_print)
 963                                 ereport(ERROR,
 964                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 965                                                  errmsg("conflicting or redundant options")));
 966                         cstate->null_print = defGetString(defel);
 967                 }
 968                 else if (strcmp(defel->defname, "header") == 0)
 969                 {
 970                         if (cstate->header_line)
 971                                 ereport(ERROR,
 972                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 973                                                  errmsg("conflicting or redundant options")));
 974                         cstate->header_line = defGetBoolean(defel);
 975                 }
 976                 else if (strcmp(defel->defname, "quote") == 0)
 977                 {
 978                         if (cstate->quote)
 979                                 ereport(ERROR,
 980                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 981                                                  errmsg("conflicting or redundant options")));
 982                         cstate->quote = defGetString(defel);
 983                 }
 984                 else if (strcmp(defel->defname, "escape") == 0)
 985                 {
 986                         if (cstate->escape)
 987                                 ereport(ERROR,
 988                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 989                                                  errmsg("conflicting or redundant options")));
 990                         cstate->escape = defGetString(defel);
 991                 }
 992                 else if (strcmp(defel->defname, "force_quote") == 0)
 993                 {
 994                         if (cstate->force_quote || cstate->force_quote_all)
 995                                 ereport(ERROR,
 996                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 997                                                  errmsg("conflicting or redundant options")));
 998                         if (defel->arg && IsA(defel->arg, A_Star))
 999                                 cstate->force_quote_all = true;
1000                         else if (defel->arg && IsA(defel->arg, List))
1001                                 cstate->force_quote = (List *) defel->arg;
1002                         else
1003                                 ereport(ERROR,
1004                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1005                                                  errmsg("argument to option \"%s\" must be a list of column names",
1006                                                                 defel->defname)));
1007                 }
1008                 else if (strcmp(defel->defname, "force_not_null") == 0)
1009                 {
1010                         if (cstate->force_notnull)
1011                                 ereport(ERROR,
1012                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1013                                                  errmsg("conflicting or redundant options")));
1014                         if (defel->arg && IsA(defel->arg, List))
1015                                 cstate->force_notnull = (List *) defel->arg;
1016                         else
1017                                 ereport(ERROR,
1018                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1019                                                  errmsg("argument to option \"%s\" must be a list of column names",
1020                                                                 defel->defname)));
1021                 }
1022                 else if (strcmp(defel->defname, "convert_selectively") == 0)
1023                 {
1024                         /*
1025                          * Undocumented, not-accessible-from-SQL option: convert only the
1026                          * named columns to binary form, storing the rest as NULLs. It's
1027                          * allowed for the column list to be NIL.
1028                          */
1029                         if (cstate->convert_selectively)
1030                                 ereport(ERROR,
1031                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1032                                                  errmsg("conflicting or redundant options")));
1033                         cstate->convert_selectively = true;
1034                         if (defel->arg == NULL || IsA(defel->arg, List))
1035                                 cstate->convert_select = (List *) defel->arg;
1036                         else
1037                                 ereport(ERROR,
1038                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1039                                                  errmsg("argument to option \"%s\" must be a list of column names",
1040                                                                 defel->defname)));
1041                 }
1042                 else if (strcmp(defel->defname, "encoding") == 0)
1043                 {
1044                         if (cstate->file_encoding >= 0)
1045                                 ereport(ERROR,
1046                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1047                                                  errmsg("conflicting or redundant options")));
1048                         cstate->file_encoding = pg_char_to_encoding(defGetString(defel));
1049                         if (cstate->file_encoding < 0)
1050                                 ereport(ERROR,
1051                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1052                                                  errmsg("argument to option \"%s\" must be a valid encoding name",
1053                                                                 defel->defname)));
1054                 }
1055                 else
1056                         ereport(ERROR,
1057                                         (errcode(ERRCODE_SYNTAX_ERROR),
1058                                          errmsg("option \"%s\" not recognized",
1059                                                         defel->defname)));
1060         }
1061
1062         /*
1063          * Check for incompatible options (must do these two before inserting
1064          * defaults)
1065          */
1066         if (cstate->binary && cstate->delim)
1067                 ereport(ERROR,
1068                                 (errcode(ERRCODE_SYNTAX_ERROR),
1069                                  errmsg("cannot specify DELIMITER in BINARY mode")));
1070
1071         if (cstate->binary && cstate->null_print)
1072                 ereport(ERROR,
1073                                 (errcode(ERRCODE_SYNTAX_ERROR),
1074                                  errmsg("cannot specify NULL in BINARY mode")));
1075
1076         /* Set defaults for omitted options */
1077         if (!cstate->delim)
1078                 cstate->delim = cstate->csv_mode ? "," : "\t";
1079
1080         if (!cstate->null_print)
1081                 cstate->null_print = cstate->csv_mode ? "" : "\\N";
1082         cstate->null_print_len = strlen(cstate->null_print);
1083
1084         if (cstate->csv_mode)
1085         {
1086                 if (!cstate->quote)
1087                         cstate->quote = "\"";
1088                 if (!cstate->escape)
1089                         cstate->escape = cstate->quote;
1090         }
1091
1092         /* Only single-byte delimiter strings are supported. */
1093         if (strlen(cstate->delim) != 1)
1094                 ereport(ERROR,
1095                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1096                           errmsg("COPY delimiter must be a single one-byte character")));
1097
1098         /* Disallow end-of-line characters */
1099         if (strchr(cstate->delim, '\r') != NULL ||
1100                 strchr(cstate->delim, '\n') != NULL)
1101                 ereport(ERROR,
1102                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1103                          errmsg("COPY delimiter cannot be newline or carriage return")));
1104
1105         if (strchr(cstate->null_print, '\r') != NULL ||
1106                 strchr(cstate->null_print, '\n') != NULL)
1107                 ereport(ERROR,
1108                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1109                                  errmsg("COPY null representation cannot use newline or carriage return")));
1110
1111         /*
1112          * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
1113          * backslash because it would be ambiguous.  We can't allow the other
1114          * cases because data characters matching the delimiter must be
1115          * backslashed, and certain backslash combinations are interpreted
1116          * non-literally by COPY IN.  Disallowing all lower case ASCII letters is
1117          * more than strictly necessary, but seems best for consistency and
1118          * future-proofing.  Likewise we disallow all digits though only octal
1119          * digits are actually dangerous.
1120          */
1121         if (!cstate->csv_mode &&
1122                 strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
1123                            cstate->delim[0]) != NULL)
1124                 ereport(ERROR,
1125                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1126                                  errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
1127
1128         /* Check header */
1129         if (!cstate->csv_mode && cstate->header_line)
1130                 ereport(ERROR,
1131                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1132                                  errmsg("COPY HEADER available only in CSV mode")));
1133
1134         /* Check quote */
1135         if (!cstate->csv_mode && cstate->quote != NULL)
1136                 ereport(ERROR,
1137                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1138                                  errmsg("COPY quote available only in CSV mode")));
1139
1140         if (cstate->csv_mode && strlen(cstate->quote) != 1)
1141                 ereport(ERROR,
1142                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1143                                  errmsg("COPY quote must be a single one-byte character")));
1144
1145         if (cstate->csv_mode && cstate->delim[0] == cstate->quote[0])
1146                 ereport(ERROR,
1147                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1148                                  errmsg("COPY delimiter and quote must be different")));
1149
1150         /* Check escape */
1151         if (!cstate->csv_mode && cstate->escape != NULL)
1152                 ereport(ERROR,
1153                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1154                                  errmsg("COPY escape available only in CSV mode")));
1155
1156         if (cstate->csv_mode && strlen(cstate->escape) != 1)
1157                 ereport(ERROR,
1158                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1159                                  errmsg("COPY escape must be a single one-byte character")));
1160
1161         /* Check force_quote */
1162         if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all))
1163                 ereport(ERROR,
1164                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1165                                  errmsg("COPY force quote available only in CSV mode")));
1166         if ((cstate->force_quote || cstate->force_quote_all) && is_from)
1167                 ereport(ERROR,
1168                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1169                                  errmsg("COPY force quote only available using COPY TO")));
1170
1171         /* Check force_notnull */
1172         if (!cstate->csv_mode && cstate->force_notnull != NIL)
1173                 ereport(ERROR,
1174                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1175                                  errmsg("COPY force not null available only in CSV mode")));
1176         if (cstate->force_notnull != NIL && !is_from)
1177                 ereport(ERROR,
1178                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1179                           errmsg("COPY force not null only available using COPY FROM")));
1180
1181         /* Don't allow the delimiter to appear in the null string. */
1182         if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
1183                 ereport(ERROR,
1184                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1185                 errmsg("COPY delimiter must not appear in the NULL specification")));
1186
1187         /* Don't allow the CSV quote char to appear in the null string. */
1188         if (cstate->csv_mode &&
1189                 strchr(cstate->null_print, cstate->quote[0]) != NULL)
1190                 ereport(ERROR,
1191                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1192                                  errmsg("CSV quote character must not appear in the NULL specification")));
1193 }
1194
1195 /*
1196  * Common setup routines used by BeginCopyFrom and BeginCopyTo.
1197  *
1198  * Iff <binary>, unload or reload in the binary format, as opposed to the
1199  * more wasteful but more robust and portable text format.
1200  *
1201  * Iff <oids>, unload or reload the format that includes OID information.
1202  * On input, we accept OIDs whether or not the table has an OID column,
1203  * but silently drop them if it does not.  On output, we report an error
1204  * if the user asks for OIDs in a table that has none (not providing an
1205  * OID column might seem friendlier, but could seriously confuse programs).
1206  *
1207  * If in the text format, delimit columns with delimiter <delim> and print
1208  * NULL values as <null_print>.
1209  */
1210 static CopyState
1211 BeginCopy(bool is_from,
1212                   Relation rel,
1213                   Node *raw_query,
1214                   const char *queryString,
1215                   List *attnamelist,
1216                   List *options)
1217 {
1218         CopyState       cstate;
1219         TupleDesc       tupDesc;
1220         int                     num_phys_attrs;
1221         MemoryContext oldcontext;
1222
1223         /* Allocate workspace and zero all fields */
1224         cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
1225
1226         /*
1227          * We allocate everything used by a cstate in a new memory context. This
1228          * avoids memory leaks during repeated use of COPY in a query.
1229          */
1230         cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
1231                                                                                                 "COPY",
1232                                                                                                 ALLOCSET_DEFAULT_MINSIZE,
1233                                                                                                 ALLOCSET_DEFAULT_INITSIZE,
1234                                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
1235
1236         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1237
1238         /* Extract options from the statement node tree */
1239         ProcessCopyOptions(cstate, is_from, options);
1240
1241         /* Process the source/target relation or query */
1242         if (rel)
1243         {
1244                 Assert(!raw_query);
1245
1246                 cstate->rel = rel;
1247
1248                 tupDesc = RelationGetDescr(cstate->rel);
1249
1250                 /* Don't allow COPY w/ OIDs to or from a table without them */
1251                 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
1252                         ereport(ERROR,
1253                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
1254                                          errmsg("table \"%s\" does not have OIDs",
1255                                                         RelationGetRelationName(cstate->rel))));
1256         }
1257         else
1258         {
1259                 List       *rewritten;
1260                 Query      *query;
1261                 PlannedStmt *plan;
1262                 DestReceiver *dest;
1263
1264                 Assert(!is_from);
1265                 cstate->rel = NULL;
1266
1267                 /* Don't allow COPY w/ OIDs from a select */
1268                 if (cstate->oids)
1269                         ereport(ERROR,
1270                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1271                                          errmsg("COPY (SELECT) WITH OIDS is not supported")));
1272
1273                 /*
1274                  * Run parse analysis and rewrite.      Note this also acquires sufficient
1275                  * locks on the source table(s).
1276                  *
1277                  * Because the parser and planner tend to scribble on their input, we
1278                  * make a preliminary copy of the source querytree.  This prevents
1279                  * problems in the case that the COPY is in a portal or plpgsql
1280                  * function and is executed repeatedly.  (See also the same hack in
1281                  * DECLARE CURSOR and PREPARE.)  XXX FIXME someday.
1282                  */
1283                 rewritten = pg_analyze_and_rewrite((Node *) copyObject(raw_query),
1284                                                                                    queryString, NULL, 0);
1285
1286                 /* We don't expect more or less than one result query */
1287                 if (list_length(rewritten) != 1)
1288                         elog(ERROR, "unexpected rewrite result");
1289
1290                 query = (Query *) linitial(rewritten);
1291
1292                 /* The grammar allows SELECT INTO, but we don't support that */
1293                 if (query->utilityStmt != NULL &&
1294                         IsA(query->utilityStmt, CreateTableAsStmt))
1295                         ereport(ERROR,
1296                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1297                                          errmsg("COPY (SELECT INTO) is not supported")));
1298
1299                 Assert(query->commandType == CMD_SELECT);
1300                 Assert(query->utilityStmt == NULL);
1301
1302                 /* plan the query */
1303                 plan = planner(query, 0, NULL);
1304
1305                 /*
1306                  * Use a snapshot with an updated command ID to ensure this query sees
1307                  * results of any previously executed queries.
1308                  */
1309                 PushCopiedSnapshot(GetActiveSnapshot());
1310                 UpdateActiveSnapshotCommandId();
1311
1312                 /* Create dest receiver for COPY OUT */
1313                 dest = CreateDestReceiver(DestCopyOut);
1314                 ((DR_copy *) dest)->cstate = cstate;
1315
1316                 /* Create a QueryDesc requesting no output */
1317                 cstate->queryDesc = CreateQueryDesc(plan, queryString,
1318                                                                                         GetActiveSnapshot(),
1319                                                                                         InvalidSnapshot,
1320                                                                                         dest, NULL, 0);
1321
1322                 /*
1323                  * Call ExecutorStart to prepare the plan for execution.
1324                  *
1325                  * ExecutorStart computes a result tupdesc for us
1326                  */
1327                 ExecutorStart(cstate->queryDesc, 0);
1328
1329                 tupDesc = cstate->queryDesc->tupDesc;
1330         }
1331
1332         /* Generate or convert list of attributes to process */
1333         cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1334
1335         num_phys_attrs = tupDesc->natts;
1336
1337         /* Convert FORCE QUOTE name list to per-column flags, check validity */
1338         cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1339         if (cstate->force_quote_all)
1340         {
1341                 int                     i;
1342
1343                 for (i = 0; i < num_phys_attrs; i++)
1344                         cstate->force_quote_flags[i] = true;
1345         }
1346         else if (cstate->force_quote)
1347         {
1348                 List       *attnums;
1349                 ListCell   *cur;
1350
1351                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote);
1352
1353                 foreach(cur, attnums)
1354                 {
1355                         int                     attnum = lfirst_int(cur);
1356
1357                         if (!list_member_int(cstate->attnumlist, attnum))
1358                                 ereport(ERROR,
1359                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1360                                    errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
1361                                                   NameStr(tupDesc->attrs[attnum - 1]->attname))));
1362                         cstate->force_quote_flags[attnum - 1] = true;
1363                 }
1364         }
1365
1366         /* Convert FORCE NOT NULL name list to per-column flags, check validity */
1367         cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1368         if (cstate->force_notnull)
1369         {
1370                 List       *attnums;
1371                 ListCell   *cur;
1372
1373                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull);
1374
1375                 foreach(cur, attnums)
1376                 {
1377                         int                     attnum = lfirst_int(cur);
1378
1379                         if (!list_member_int(cstate->attnumlist, attnum))
1380                                 ereport(ERROR,
1381                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1382                                 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
1383                                            NameStr(tupDesc->attrs[attnum - 1]->attname))));
1384                         cstate->force_notnull_flags[attnum - 1] = true;
1385                 }
1386         }
1387
1388         /* Convert convert_selectively name list to per-column flags */
1389         if (cstate->convert_selectively)
1390         {
1391                 List       *attnums;
1392                 ListCell   *cur;
1393
1394                 cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1395
1396                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
1397
1398                 foreach(cur, attnums)
1399                 {
1400                         int                     attnum = lfirst_int(cur);
1401
1402                         if (!list_member_int(cstate->attnumlist, attnum))
1403                                 ereport(ERROR,
1404                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1405                                                  errmsg_internal("selected column \"%s\" not referenced by COPY",
1406                                                          NameStr(tupDesc->attrs[attnum - 1]->attname))));
1407                         cstate->convert_select_flags[attnum - 1] = true;
1408                 }
1409         }
1410
1411         /* Use client encoding when ENCODING option is not specified. */
1412         if (cstate->file_encoding < 0)
1413                 cstate->file_encoding = pg_get_client_encoding();
1414
1415         /*
1416          * Set up encoding conversion info.  Even if the file and server encodings
1417          * are the same, we must apply pg_any_to_server() to validate data in
1418          * multibyte encodings.
1419          */
1420         cstate->need_transcoding =
1421                 (cstate->file_encoding != GetDatabaseEncoding() ||
1422                  pg_database_encoding_max_length() > 1);
1423         /* See Multibyte encoding comment above */
1424         cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
1425
1426         cstate->copy_dest = COPY_FILE;          /* default */
1427
1428         MemoryContextSwitchTo(oldcontext);
1429
1430         return cstate;
1431 }
1432
1433 /*
1434  * Closes the pipe to an external program, checking the pclose() return code.
1435  */
1436 static void
1437 ClosePipeToProgram(CopyState cstate)
1438 {
1439         int                     pclose_rc;
1440
1441         Assert(cstate->is_program);
1442
1443         pclose_rc = ClosePipeStream(cstate->copy_file);
1444         if (pclose_rc == -1)
1445                 ereport(ERROR,
1446                                 (errmsg("could not close pipe to external command: %m")));
1447         else if (pclose_rc != 0)
1448                 ereport(ERROR,
1449                                 (errmsg("program \"%s\" failed",
1450                                                 cstate->filename),
1451                                  errdetail_internal("%s", wait_result_to_str(pclose_rc))));
1452 }
1453
1454 /*
1455  * Release resources allocated in a cstate for COPY TO/FROM.
1456  */
1457 static void
1458 EndCopy(CopyState cstate)
1459 {
1460         if (cstate->is_program)
1461         {
1462                 ClosePipeToProgram(cstate);
1463         }
1464         else
1465         {
1466                 if (cstate->filename != NULL && FreeFile(cstate->copy_file))
1467                         ereport(ERROR,
1468                                         (errcode_for_file_access(),
1469                                          errmsg("could not close file \"%s\": %m",
1470                                                         cstate->filename)));
1471         }
1472
1473         MemoryContextDelete(cstate->copycontext);
1474         pfree(cstate);
1475 }
1476
1477 /*
1478  * Setup CopyState to read tuples from a table or a query for COPY TO.
1479  */
1480 static CopyState
1481 BeginCopyTo(Relation rel,
1482                         Node *query,
1483                         const char *queryString,
1484                         const char *filename,
1485                         bool is_program,
1486                         List *attnamelist,
1487                         List *options)
1488 {
1489         CopyState       cstate;
1490         bool            pipe = (filename == NULL);
1491         MemoryContext oldcontext;
1492
1493         if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
1494         {
1495                 if (rel->rd_rel->relkind == RELKIND_VIEW)
1496                         ereport(ERROR,
1497                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1498                                          errmsg("cannot copy from view \"%s\"",
1499                                                         RelationGetRelationName(rel)),
1500                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1501                 else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
1502                         ereport(ERROR,
1503                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1504                                          errmsg("cannot copy from materialized view \"%s\"",
1505                                                         RelationGetRelationName(rel)),
1506                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1507                 else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1508                         ereport(ERROR,
1509                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1510                                          errmsg("cannot copy from foreign table \"%s\"",
1511                                                         RelationGetRelationName(rel)),
1512                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1513                 else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
1514                         ereport(ERROR,
1515                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1516                                          errmsg("cannot copy from sequence \"%s\"",
1517                                                         RelationGetRelationName(rel))));
1518                 else
1519                         ereport(ERROR,
1520                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1521                                          errmsg("cannot copy from non-table relation \"%s\"",
1522                                                         RelationGetRelationName(rel))));
1523         }
1524
1525         cstate = BeginCopy(false, rel, query, queryString, attnamelist, options);
1526         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1527
1528         if (pipe)
1529         {
1530                 Assert(!is_program);    /* the grammar does not allow this */
1531                 if (whereToSendOutput != DestRemote)
1532                         cstate->copy_file = stdout;
1533         }
1534         else
1535         {
1536                 cstate->filename = pstrdup(filename);
1537                 cstate->is_program = is_program;
1538
1539                 if (is_program)
1540                 {
1541                         cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
1542                         if (cstate->copy_file == NULL)
1543                                 ereport(ERROR,
1544                                                 (errmsg("could not execute command \"%s\": %m",
1545                                                                 cstate->filename)));
1546                 }
1547                 else
1548                 {
1549                         mode_t          oumask; /* Pre-existing umask value */
1550                         struct stat st;
1551
1552                         /*
1553                          * Prevent write to relative path ... too easy to shoot oneself in
1554                          * the foot by overwriting a database file ...
1555                          */
1556                         if (!is_absolute_path(filename))
1557                                 ereport(ERROR,
1558                                                 (errcode(ERRCODE_INVALID_NAME),
1559                                           errmsg("relative path not allowed for COPY to file")));
1560
1561                         oumask = umask(S_IWGRP | S_IWOTH);
1562                         cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1563                         umask(oumask);
1564                         if (cstate->copy_file == NULL)
1565                                 ereport(ERROR,
1566                                                 (errcode_for_file_access(),
1567                                                  errmsg("could not open file \"%s\" for writing: %m",
1568                                                                 cstate->filename)));
1569
1570                         fstat(fileno(cstate->copy_file), &st);
1571                         if (S_ISDIR(st.st_mode))
1572                                 ereport(ERROR,
1573                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1574                                                  errmsg("\"%s\" is a directory", cstate->filename)));
1575                 }
1576         }
1577
1578         MemoryContextSwitchTo(oldcontext);
1579
1580         return cstate;
1581 }
1582
1583 /*
1584  * This intermediate routine exists mainly to localize the effects of setjmp
1585  * so we don't need to plaster a lot of variables with "volatile".
1586  */
1587 static uint64
1588 DoCopyTo(CopyState cstate)
1589 {
1590         bool            pipe = (cstate->filename == NULL);
1591         bool            fe_copy = (pipe && whereToSendOutput == DestRemote);
1592         uint64          processed;
1593
1594         PG_TRY();
1595         {
1596                 if (fe_copy)
1597                         SendCopyBegin(cstate);
1598
1599                 processed = CopyTo(cstate);
1600
1601                 if (fe_copy)
1602                         SendCopyEnd(cstate);
1603         }
1604         PG_CATCH();
1605         {
1606                 /*
1607                  * Make sure we turn off old-style COPY OUT mode upon error. It is
1608                  * okay to do this in all cases, since it does nothing if the mode is
1609                  * not on.
1610                  */
1611                 pq_endcopyout(true);
1612                 PG_RE_THROW();
1613         }
1614         PG_END_TRY();
1615
1616         return processed;
1617 }
1618
1619 /*
1620  * Clean up storage and release resources for COPY TO.
1621  */
1622 static void
1623 EndCopyTo(CopyState cstate)
1624 {
1625         if (cstate->queryDesc != NULL)
1626         {
1627                 /* Close down the query and free resources. */
1628                 ExecutorFinish(cstate->queryDesc);
1629                 ExecutorEnd(cstate->queryDesc);
1630                 FreeQueryDesc(cstate->queryDesc);
1631                 PopActiveSnapshot();
1632         }
1633
1634         /* Clean up storage */
1635         EndCopy(cstate);
1636 }
1637
1638 /*
1639  * Copy from relation or query TO file.
1640  */
1641 static uint64
1642 CopyTo(CopyState cstate)
1643 {
1644         TupleDesc       tupDesc;
1645         int                     num_phys_attrs;
1646         Form_pg_attribute *attr;
1647         ListCell   *cur;
1648         uint64          processed;
1649
1650         if (cstate->rel)
1651                 tupDesc = RelationGetDescr(cstate->rel);
1652         else
1653                 tupDesc = cstate->queryDesc->tupDesc;
1654         attr = tupDesc->attrs;
1655         num_phys_attrs = tupDesc->natts;
1656         cstate->null_print_client = cstate->null_print;         /* default */
1657
1658         /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1659         cstate->fe_msgbuf = makeStringInfo();
1660
1661         /* Get info about the columns we need to process. */
1662         cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1663         foreach(cur, cstate->attnumlist)
1664         {
1665                 int                     attnum = lfirst_int(cur);
1666                 Oid                     out_func_oid;
1667                 bool            isvarlena;
1668
1669                 if (cstate->binary)
1670                         getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1671                                                                         &out_func_oid,
1672                                                                         &isvarlena);
1673                 else
1674                         getTypeOutputInfo(attr[attnum - 1]->atttypid,
1675                                                           &out_func_oid,
1676                                                           &isvarlena);
1677                 fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1678         }
1679
1680         /*
1681          * Create a temporary memory context that we can reset once per row to
1682          * recover palloc'd memory.  This avoids any problems with leaks inside
1683          * datatype output routines, and should be faster than retail pfree's
1684          * anyway.      (We don't need a whole econtext as CopyFrom does.)
1685          */
1686         cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1687                                                                                            "COPY TO",
1688                                                                                            ALLOCSET_DEFAULT_MINSIZE,
1689                                                                                            ALLOCSET_DEFAULT_INITSIZE,
1690                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1691
1692         if (cstate->binary)
1693         {
1694                 /* Generate header for a binary copy */
1695                 int32           tmp;
1696
1697                 /* Signature */
1698                 CopySendData(cstate, BinarySignature, 11);
1699                 /* Flags field */
1700                 tmp = 0;
1701                 if (cstate->oids)
1702                         tmp |= (1 << 16);
1703                 CopySendInt32(cstate, tmp);
1704                 /* No header extension */
1705                 tmp = 0;
1706                 CopySendInt32(cstate, tmp);
1707         }
1708         else
1709         {
1710                 /*
1711                  * For non-binary copy, we need to convert null_print to file
1712                  * encoding, because it will be sent directly with CopySendString.
1713                  */
1714                 if (cstate->need_transcoding)
1715                         cstate->null_print_client = pg_server_to_any(cstate->null_print,
1716                                                                                                           cstate->null_print_len,
1717                                                                                                           cstate->file_encoding);
1718
1719                 /* if a header has been requested send the line */
1720                 if (cstate->header_line)
1721                 {
1722                         bool            hdr_delim = false;
1723
1724                         foreach(cur, cstate->attnumlist)
1725                         {
1726                                 int                     attnum = lfirst_int(cur);
1727                                 char       *colname;
1728
1729                                 if (hdr_delim)
1730                                         CopySendChar(cstate, cstate->delim[0]);
1731                                 hdr_delim = true;
1732
1733                                 colname = NameStr(attr[attnum - 1]->attname);
1734
1735                                 CopyAttributeOutCSV(cstate, colname, false,
1736                                                                         list_length(cstate->attnumlist) == 1);
1737                         }
1738
1739                         CopySendEndOfRow(cstate);
1740                 }
1741         }
1742
1743         if (cstate->rel)
1744         {
1745                 Datum      *values;
1746                 bool       *nulls;
1747                 HeapScanDesc scandesc;
1748                 HeapTuple       tuple;
1749
1750                 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1751                 nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
1752
1753                 scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1754
1755                 processed = 0;
1756                 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1757                 {
1758                         CHECK_FOR_INTERRUPTS();
1759
1760                         /* Deconstruct the tuple ... faster than repeated heap_getattr */
1761                         heap_deform_tuple(tuple, tupDesc, values, nulls);
1762
1763                         /* Format and send the data */
1764                         CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
1765                         processed++;
1766                 }
1767
1768                 heap_endscan(scandesc);
1769
1770                 pfree(values);
1771                 pfree(nulls);
1772         }
1773         else
1774         {
1775                 /* run the plan --- the dest receiver will send tuples */
1776                 ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
1777                 processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1778         }
1779
1780         if (cstate->binary)
1781         {
1782                 /* Generate trailer for a binary copy */
1783                 CopySendInt16(cstate, -1);
1784                 /* Need to flush out the trailer */
1785                 CopySendEndOfRow(cstate);
1786         }
1787
1788         MemoryContextDelete(cstate->rowcontext);
1789
1790         return processed;
1791 }
1792
1793 /*
1794  * Emit one row during CopyTo().
1795  */
1796 static void
1797 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
1798 {
1799         bool            need_delim = false;
1800         FmgrInfo   *out_functions = cstate->out_functions;
1801         MemoryContext oldcontext;
1802         ListCell   *cur;
1803         char       *string;
1804
1805         MemoryContextReset(cstate->rowcontext);
1806         oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1807
1808         if (cstate->binary)
1809         {
1810                 /* Binary per-tuple header */
1811                 CopySendInt16(cstate, list_length(cstate->attnumlist));
1812                 /* Send OID if wanted --- note attnumlist doesn't include it */
1813                 if (cstate->oids)
1814                 {
1815                         /* Hack --- assume Oid is same size as int32 */
1816                         CopySendInt32(cstate, sizeof(int32));
1817                         CopySendInt32(cstate, tupleOid);
1818                 }
1819         }
1820         else
1821         {
1822                 /* Text format has no per-tuple header, but send OID if wanted */
1823                 /* Assume digits don't need any quoting or encoding conversion */
1824                 if (cstate->oids)
1825                 {
1826                         string = DatumGetCString(DirectFunctionCall1(oidout,
1827                                                                                                 ObjectIdGetDatum(tupleOid)));
1828                         CopySendString(cstate, string);
1829                         need_delim = true;
1830                 }
1831         }
1832
1833         foreach(cur, cstate->attnumlist)
1834         {
1835                 int                     attnum = lfirst_int(cur);
1836                 Datum           value = values[attnum - 1];
1837                 bool            isnull = nulls[attnum - 1];
1838
1839                 if (!cstate->binary)
1840                 {
1841                         if (need_delim)
1842                                 CopySendChar(cstate, cstate->delim[0]);
1843                         need_delim = true;
1844                 }
1845
1846                 if (isnull)
1847                 {
1848                         if (!cstate->binary)
1849                                 CopySendString(cstate, cstate->null_print_client);
1850                         else
1851                                 CopySendInt32(cstate, -1);
1852                 }
1853                 else
1854                 {
1855                         if (!cstate->binary)
1856                         {
1857                                 string = OutputFunctionCall(&out_functions[attnum - 1],
1858                                                                                         value);
1859                                 if (cstate->csv_mode)
1860                                         CopyAttributeOutCSV(cstate, string,
1861                                                                                 cstate->force_quote_flags[attnum - 1],
1862                                                                                 list_length(cstate->attnumlist) == 1);
1863                                 else
1864                                         CopyAttributeOutText(cstate, string);
1865                         }
1866                         else
1867                         {
1868                                 bytea      *outputbytes;
1869
1870                                 outputbytes = SendFunctionCall(&out_functions[attnum - 1],
1871                                                                                            value);
1872                                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1873                                 CopySendData(cstate, VARDATA(outputbytes),
1874                                                          VARSIZE(outputbytes) - VARHDRSZ);
1875                         }
1876                 }
1877         }
1878
1879         CopySendEndOfRow(cstate);
1880
1881         MemoryContextSwitchTo(oldcontext);
1882 }
1883
1884
1885 /*
1886  * error context callback for COPY FROM
1887  *
1888  * The argument for the error context must be CopyState.
1889  */
1890 void
1891 CopyFromErrorCallback(void *arg)
1892 {
1893         CopyState       cstate = (CopyState) arg;
1894
1895         if (cstate->binary)
1896         {
1897                 /* can't usefully display the data */
1898                 if (cstate->cur_attname)
1899                         errcontext("COPY %s, line %d, column %s",
1900                                            cstate->cur_relname, cstate->cur_lineno,
1901                                            cstate->cur_attname);
1902                 else
1903                         errcontext("COPY %s, line %d",
1904                                            cstate->cur_relname, cstate->cur_lineno);
1905         }
1906         else
1907         {
1908                 if (cstate->cur_attname && cstate->cur_attval)
1909                 {
1910                         /* error is relevant to a particular column */
1911                         char       *attval;
1912
1913                         attval = limit_printout_length(cstate->cur_attval);
1914                         errcontext("COPY %s, line %d, column %s: \"%s\"",
1915                                            cstate->cur_relname, cstate->cur_lineno,
1916                                            cstate->cur_attname, attval);
1917                         pfree(attval);
1918                 }
1919                 else if (cstate->cur_attname)
1920                 {
1921                         /* error is relevant to a particular column, value is NULL */
1922                         errcontext("COPY %s, line %d, column %s: null input",
1923                                            cstate->cur_relname, cstate->cur_lineno,
1924                                            cstate->cur_attname);
1925                 }
1926                 else
1927                 {
1928                         /*
1929                          * Error is relevant to a particular line.
1930                          *
1931                          * If line_buf still contains the correct line, and it's already
1932                          * transcoded, print it. If it's still in a foreign encoding, it's
1933                          * quite likely that the error is precisely a failure to do
1934                          * encoding conversion (ie, bad data). We dare not try to convert
1935                          * it, and at present there's no way to regurgitate it without
1936                          * conversion. So we have to punt and just report the line number.
1937                          */
1938                         if (cstate->line_buf_valid &&
1939                                 (cstate->line_buf_converted || !cstate->need_transcoding))
1940                         {
1941                                 char       *lineval;
1942
1943                                 lineval = limit_printout_length(cstate->line_buf.data);
1944                                 errcontext("COPY %s, line %d: \"%s\"",
1945                                                    cstate->cur_relname, cstate->cur_lineno, lineval);
1946                                 pfree(lineval);
1947                         }
1948                         else
1949                         {
1950                                 errcontext("COPY %s, line %d",
1951                                                    cstate->cur_relname, cstate->cur_lineno);
1952                         }
1953                 }
1954         }
1955 }
1956
1957 /*
1958  * Make sure we don't print an unreasonable amount of COPY data in a message.
1959  *
1960  * It would seem a lot easier to just use the sprintf "precision" limit to
1961  * truncate the string.  However, some versions of glibc have a bug/misfeature
1962  * that vsnprintf will always fail (return -1) if it is asked to truncate
1963  * a string that contains invalid byte sequences for the current encoding.
1964  * So, do our own truncation.  We return a pstrdup'd copy of the input.
1965  */
1966 static char *
1967 limit_printout_length(const char *str)
1968 {
1969 #define MAX_COPY_DATA_DISPLAY 100
1970
1971         int                     slen = strlen(str);
1972         int                     len;
1973         char       *res;
1974
1975         /* Fast path if definitely okay */
1976         if (slen <= MAX_COPY_DATA_DISPLAY)
1977                 return pstrdup(str);
1978
1979         /* Apply encoding-dependent truncation */
1980         len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
1981
1982         /*
1983          * Truncate, and add "..." to show we truncated the input.
1984          */
1985         res = (char *) palloc(len + 4);
1986         memcpy(res, str, len);
1987         strcpy(res + len, "...");
1988
1989         return res;
1990 }
1991
1992 /*
1993  * Copy FROM file to relation.
1994  */
1995 static uint64
1996 CopyFrom(CopyState cstate)
1997 {
1998         HeapTuple       tuple;
1999         TupleDesc       tupDesc;
2000         Datum      *values;
2001         bool       *nulls;
2002         ResultRelInfo *resultRelInfo;
2003         EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
2004         ExprContext *econtext;
2005         TupleTableSlot *myslot;
2006         MemoryContext oldcontext = CurrentMemoryContext;
2007
2008         ErrorContextCallback errcallback;
2009         CommandId       mycid = GetCurrentCommandId(true);
2010         int                     hi_options = 0; /* start with default heap_insert options */
2011         BulkInsertState bistate;
2012         uint64          processed = 0;
2013         bool            useHeapMultiInsert;
2014         int                     nBufferedTuples = 0;
2015
2016 #define MAX_BUFFERED_TUPLES 1000
2017         HeapTuple  *bufferedTuples = NULL;      /* initialize to silence warning */
2018         Size            bufferedTuplesSize = 0;
2019         int                     firstBufferedLineNo = 0;
2020
2021         Assert(cstate->rel);
2022
2023         if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
2024         {
2025                 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
2026                         ereport(ERROR,
2027                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2028                                          errmsg("cannot copy to view \"%s\"",
2029                                                         RelationGetRelationName(cstate->rel))));
2030                 else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
2031                         ereport(ERROR,
2032                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2033                                          errmsg("cannot copy to materialized view \"%s\"",
2034                                                         RelationGetRelationName(cstate->rel))));
2035                 else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2036                         ereport(ERROR,
2037                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2038                                          errmsg("cannot copy to foreign table \"%s\"",
2039                                                         RelationGetRelationName(cstate->rel))));
2040                 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
2041                         ereport(ERROR,
2042                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2043                                          errmsg("cannot copy to sequence \"%s\"",
2044                                                         RelationGetRelationName(cstate->rel))));
2045                 else
2046                         ereport(ERROR,
2047                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2048                                          errmsg("cannot copy to non-table relation \"%s\"",
2049                                                         RelationGetRelationName(cstate->rel))));
2050         }
2051
2052         tupDesc = RelationGetDescr(cstate->rel);
2053
2054         /*----------
2055          * Check to see if we can avoid writing WAL
2056          *
2057          * If archive logging/streaming is not enabled *and* either
2058          *      - table was created in same transaction as this COPY
2059          *      - data is being written to relfilenode created in this transaction
2060          * then we can skip writing WAL.  It's safe because if the transaction
2061          * doesn't commit, we'll discard the table (or the new relfilenode file).
2062          * If it does commit, we'll have done the heap_sync at the bottom of this
2063          * routine first.
2064          *
2065          * As mentioned in comments in utils/rel.h, the in-same-transaction test
2066          * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
2067          * can be cleared before the end of the transaction. The exact case is
2068          * when a relation sets a new relfilenode twice in same transaction, yet
2069          * the second one fails in an aborted subtransaction, e.g.
2070          *
2071          * BEGIN;
2072          * TRUNCATE t;
2073          * SAVEPOINT save;
2074          * TRUNCATE t;
2075          * ROLLBACK TO save;
2076          * COPY ...
2077          *
2078          * Also, if the target file is new-in-transaction, we assume that checking
2079          * FSM for free space is a waste of time, even if we must use WAL because
2080          * of archiving.  This could possibly be wrong, but it's unlikely.
2081          *
2082          * The comments for heap_insert and RelationGetBufferForTuple specify that
2083          * skipping WAL logging is only safe if we ensure that our tuples do not
2084          * go into pages containing tuples from any other transactions --- but this
2085          * must be the case if we have a new table or new relfilenode, so we need
2086          * no additional work to enforce that.
2087          *----------
2088          */
2089         /* createSubid is creation check, newRelfilenodeSubid is truncation check */
2090         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
2091                 cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2092         {
2093                 hi_options |= HEAP_INSERT_SKIP_FSM;
2094                 if (!XLogIsNeeded())
2095                         hi_options |= HEAP_INSERT_SKIP_WAL;
2096         }
2097
2098         /*
2099          * Optimize if new relfilenode was created in this subxact or one of its
2100          * committed children and we won't see those rows later as part of an
2101          * earlier scan or command. This ensures that if this subtransaction
2102          * aborts then the frozen rows won't be visible after xact cleanup. Note
2103          * that the stronger test of exactly which subtransaction created it is
2104          * crucial for correctness of this optimisation.
2105          */
2106         if (cstate->freeze)
2107         {
2108                 if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
2109                         ereport(ERROR,
2110                                         (ERRCODE_INVALID_TRANSACTION_STATE,
2111                                          errmsg("cannot perform FREEZE because of prior transaction activity")));
2112
2113                 if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
2114                  cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
2115                         ereport(ERROR,
2116                                         (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE,
2117                                          errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
2118
2119                 hi_options |= HEAP_INSERT_FROZEN;
2120         }
2121
2122         /*
2123          * We need a ResultRelInfo so we can use the regular executor's
2124          * index-entry-making machinery.  (There used to be a huge amount of code
2125          * here that basically duplicated execUtils.c ...)
2126          */
2127         resultRelInfo = makeNode(ResultRelInfo);
2128         InitResultRelInfo(resultRelInfo,
2129                                           cstate->rel,
2130                                           1,            /* dummy rangetable index */
2131                                           0);
2132
2133         ExecOpenIndices(resultRelInfo);
2134
2135         estate->es_result_relations = resultRelInfo;
2136         estate->es_num_result_relations = 1;
2137         estate->es_result_relation_info = resultRelInfo;
2138
2139         /* Set up a tuple slot too */
2140         myslot = ExecInitExtraTupleSlot(estate);
2141         ExecSetSlotDescriptor(myslot, tupDesc);
2142         /* Triggers might need a slot as well */
2143         estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
2144
2145         /*
2146          * It's more efficient to prepare a bunch of tuples for insertion, and
2147          * insert them in one heap_multi_insert() call, than call heap_insert()
2148          * separately for every tuple. However, we can't do that if there are
2149          * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
2150          * expressions. Such triggers or expressions might query the table we're
2151          * inserting to, and act differently if the tuples that have already been
2152          * processed and prepared for insertion are not there.
2153          */
2154         if ((resultRelInfo->ri_TrigDesc != NULL &&
2155                  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2156                   resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
2157                 cstate->volatile_defexprs)
2158         {
2159                 useHeapMultiInsert = false;
2160         }
2161         else
2162         {
2163                 useHeapMultiInsert = true;
2164                 bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
2165         }
2166
2167         /* Prepare to catch AFTER triggers. */
2168         AfterTriggerBeginQuery();
2169
2170         /*
2171          * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
2172          * should do this for COPY, since it's not really an "INSERT" statement as
2173          * such. However, executing these triggers maintains consistency with the
2174          * EACH ROW triggers that we already fire on COPY.
2175          */
2176         ExecBSInsertTriggers(estate, resultRelInfo);
2177
2178         values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
2179         nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
2180
2181         bistate = GetBulkInsertState();
2182         econtext = GetPerTupleExprContext(estate);
2183
2184         /* Set up callback to identify error line number */
2185         errcallback.callback = CopyFromErrorCallback;
2186         errcallback.arg = (void *) cstate;
2187         errcallback.previous = error_context_stack;
2188         error_context_stack = &errcallback;
2189
2190         for (;;)
2191         {
2192                 TupleTableSlot *slot;
2193                 bool            skip_tuple;
2194                 Oid                     loaded_oid = InvalidOid;
2195
2196                 CHECK_FOR_INTERRUPTS();
2197
2198                 if (nBufferedTuples == 0)
2199                 {
2200                         /*
2201                          * Reset the per-tuple exprcontext. We can only do this if the
2202                          * tuple buffer is empty (calling the context the per-tuple memory
2203                          * context is a bit of a misnomer now
2204                          */
2205                         ResetPerTupleExprContext(estate);
2206                 }
2207
2208                 /* Switch into its memory context */
2209                 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2210
2211                 if (!NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid))
2212                         break;
2213
2214                 /* And now we can form the input tuple. */
2215                 tuple = heap_form_tuple(tupDesc, values, nulls);
2216
2217                 if (loaded_oid != InvalidOid)
2218                         HeapTupleSetOid(tuple, loaded_oid);
2219
2220                 /* Triggers and stuff need to be invoked in query context. */
2221                 MemoryContextSwitchTo(oldcontext);
2222
2223                 /* Place tuple in tuple slot --- but slot shouldn't free it */
2224                 slot = myslot;
2225                 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2226
2227                 skip_tuple = false;
2228
2229                 /* BEFORE ROW INSERT Triggers */
2230                 if (resultRelInfo->ri_TrigDesc &&
2231                         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
2232                 {
2233                         slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
2234
2235                         if (slot == NULL)       /* "do nothing" */
2236                                 skip_tuple = true;
2237                         else    /* trigger might have changed tuple */
2238                                 tuple = ExecMaterializeSlot(slot);
2239                 }
2240
2241                 if (!skip_tuple)
2242                 {
2243                         /* Check the constraints of the tuple */
2244                         if (cstate->rel->rd_att->constr)
2245                                 ExecConstraints(resultRelInfo, slot, estate);
2246
2247                         if (useHeapMultiInsert)
2248                         {
2249                                 /* Add this tuple to the tuple buffer */
2250                                 if (nBufferedTuples == 0)
2251                                         firstBufferedLineNo = cstate->cur_lineno;
2252                                 bufferedTuples[nBufferedTuples++] = tuple;
2253                                 bufferedTuplesSize += tuple->t_len;
2254
2255                                 /*
2256                                  * If the buffer filled up, flush it. Also flush if the total
2257                                  * size of all the tuples in the buffer becomes large, to
2258                                  * avoid using large amounts of memory for the buffers when
2259                                  * the tuples are exceptionally wide.
2260                                  */
2261                                 if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
2262                                         bufferedTuplesSize > 65535)
2263                                 {
2264                                         CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2265                                                                                 resultRelInfo, myslot, bistate,
2266                                                                                 nBufferedTuples, bufferedTuples,
2267                                                                                 firstBufferedLineNo);
2268                                         nBufferedTuples = 0;
2269                                         bufferedTuplesSize = 0;
2270                                 }
2271                         }
2272                         else
2273                         {
2274                                 List       *recheckIndexes = NIL;
2275
2276                                 /* OK, store the tuple and create index entries for it */
2277                                 heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
2278
2279                                 if (resultRelInfo->ri_NumIndices > 0)
2280                                         recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
2281                                                                                                                    estate);
2282
2283                                 /* AFTER ROW INSERT Triggers */
2284                                 ExecARInsertTriggers(estate, resultRelInfo, tuple,
2285                                                                          recheckIndexes);
2286
2287                                 list_free(recheckIndexes);
2288                         }
2289
2290                         /*
2291                          * We count only tuples not suppressed by a BEFORE INSERT trigger;
2292                          * this is the same definition used by execMain.c for counting
2293                          * tuples inserted by an INSERT command.
2294                          */
2295                         processed++;
2296                 }
2297         }
2298
2299         /* Flush any remaining buffered tuples */
2300         if (nBufferedTuples > 0)
2301                 CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2302                                                         resultRelInfo, myslot, bistate,
2303                                                         nBufferedTuples, bufferedTuples,
2304                                                         firstBufferedLineNo);
2305
2306         /* Done, clean up */
2307         error_context_stack = errcallback.previous;
2308
2309         FreeBulkInsertState(bistate);
2310
2311         MemoryContextSwitchTo(oldcontext);
2312
2313         /* Execute AFTER STATEMENT insertion triggers */
2314         ExecASInsertTriggers(estate, resultRelInfo);
2315
2316         /* Handle queued AFTER triggers */
2317         AfterTriggerEndQuery(estate);
2318
2319         pfree(values);
2320         pfree(nulls);
2321
2322         ExecResetTupleTable(estate->es_tupleTable, false);
2323
2324         ExecCloseIndices(resultRelInfo);
2325
2326         FreeExecutorState(estate);
2327
2328         /*
2329          * If we skipped writing WAL, then we need to sync the heap (but not
2330          * indexes since those use WAL anyway)
2331          */
2332         if (hi_options & HEAP_INSERT_SKIP_WAL)
2333                 heap_sync(cstate->rel);
2334
2335         return processed;
2336 }
2337
2338 /*
2339  * A subroutine of CopyFrom, to write the current batch of buffered heap
2340  * tuples to the heap. Also updates indexes and runs AFTER ROW INSERT
2341  * triggers.
2342  */
2343 static void
2344 CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
2345                                         int hi_options, ResultRelInfo *resultRelInfo,
2346                                         TupleTableSlot *myslot, BulkInsertState bistate,
2347                                         int nBufferedTuples, HeapTuple *bufferedTuples,
2348                                         int firstBufferedLineNo)
2349 {
2350         MemoryContext oldcontext;
2351         int                     i;
2352         int                     save_cur_lineno;
2353
2354         /*
2355          * Print error context information correctly, if one of the operations
2356          * below fail.
2357          */
2358         cstate->line_buf_valid = false;
2359         save_cur_lineno = cstate->cur_lineno;
2360
2361         /*
2362          * heap_multi_insert leaks memory, so switch to short-lived memory context
2363          * before calling it.
2364          */
2365         oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2366         heap_multi_insert(cstate->rel,
2367                                           bufferedTuples,
2368                                           nBufferedTuples,
2369                                           mycid,
2370                                           hi_options,
2371                                           bistate);
2372         MemoryContextSwitchTo(oldcontext);
2373
2374         /*
2375          * If there are any indexes, update them for all the inserted tuples, and
2376          * run AFTER ROW INSERT triggers.
2377          */
2378         if (resultRelInfo->ri_NumIndices > 0)
2379         {
2380                 for (i = 0; i < nBufferedTuples; i++)
2381                 {
2382                         List       *recheckIndexes;
2383
2384                         cstate->cur_lineno = firstBufferedLineNo + i;
2385                         ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
2386                         recheckIndexes =
2387                                 ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
2388                                                                           estate);
2389                         ExecARInsertTriggers(estate, resultRelInfo,
2390                                                                  bufferedTuples[i],
2391                                                                  recheckIndexes);
2392                         list_free(recheckIndexes);
2393                 }
2394         }
2395
2396         /*
2397          * There's no indexes, but see if we need to run AFTER ROW INSERT triggers
2398          * anyway.
2399          */
2400         else if (resultRelInfo->ri_TrigDesc != NULL &&
2401                          resultRelInfo->ri_TrigDesc->trig_insert_after_row)
2402         {
2403                 for (i = 0; i < nBufferedTuples; i++)
2404                 {
2405                         cstate->cur_lineno = firstBufferedLineNo + i;
2406                         ExecARInsertTriggers(estate, resultRelInfo,
2407                                                                  bufferedTuples[i],
2408                                                                  NIL);
2409                 }
2410         }
2411
2412         /* reset cur_lineno to where we were */
2413         cstate->cur_lineno = save_cur_lineno;
2414 }
2415
2416 /*
2417  * Setup to read tuples from a file for COPY FROM.
2418  *
2419  * 'rel': Used as a template for the tuples
2420  * 'filename': Name of server-local file to read
2421  * 'attnamelist': List of char *, columns to include. NIL selects all cols.
2422  * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
2423  *
2424  * Returns a CopyState, to be passed to NextCopyFrom and related functions.
2425  */
2426 CopyState
2427 BeginCopyFrom(Relation rel,
2428                           const char *filename,
2429                           bool is_program,
2430                           List *attnamelist,
2431                           List *options)
2432 {
2433         CopyState       cstate;
2434         bool            pipe = (filename == NULL);
2435         TupleDesc       tupDesc;
2436         Form_pg_attribute *attr;
2437         AttrNumber      num_phys_attrs,
2438                                 num_defaults;
2439         FmgrInfo   *in_functions;
2440         Oid                *typioparams;
2441         int                     attnum;
2442         Oid                     in_func_oid;
2443         int                *defmap;
2444         ExprState **defexprs;
2445         MemoryContext oldcontext;
2446         bool            volatile_defexprs;
2447
2448         cstate = BeginCopy(true, rel, NULL, NULL, attnamelist, options);
2449         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
2450
2451         /* Initialize state variables */
2452         cstate->fe_eof = false;
2453         cstate->eol_type = EOL_UNKNOWN;
2454         cstate->cur_relname = RelationGetRelationName(cstate->rel);
2455         cstate->cur_lineno = 0;
2456         cstate->cur_attname = NULL;
2457         cstate->cur_attval = NULL;
2458
2459         /* Set up variables to avoid per-attribute overhead. */
2460         initStringInfo(&cstate->attribute_buf);
2461         initStringInfo(&cstate->line_buf);
2462         cstate->line_buf_converted = false;
2463         cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
2464         cstate->raw_buf_index = cstate->raw_buf_len = 0;
2465
2466         tupDesc = RelationGetDescr(cstate->rel);
2467         attr = tupDesc->attrs;
2468         num_phys_attrs = tupDesc->natts;
2469         num_defaults = 0;
2470         volatile_defexprs = false;
2471
2472         /*
2473          * Pick up the required catalog information for each attribute in the
2474          * relation, including the input function, the element type (to pass to
2475          * the input function), and info about defaults and constraints. (Which
2476          * input function we use depends on text/binary format choice.)
2477          */
2478         in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
2479         typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
2480         defmap = (int *) palloc(num_phys_attrs * sizeof(int));
2481         defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
2482
2483         for (attnum = 1; attnum <= num_phys_attrs; attnum++)
2484         {
2485                 /* We don't need info for dropped attributes */
2486                 if (attr[attnum - 1]->attisdropped)
2487                         continue;
2488
2489                 /* Fetch the input function and typioparam info */
2490                 if (cstate->binary)
2491                         getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
2492                                                                    &in_func_oid, &typioparams[attnum - 1]);
2493                 else
2494                         getTypeInputInfo(attr[attnum - 1]->atttypid,
2495                                                          &in_func_oid, &typioparams[attnum - 1]);
2496                 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
2497
2498                 /* Get default info if needed */
2499                 if (!list_member_int(cstate->attnumlist, attnum))
2500                 {
2501                         /* attribute is NOT to be copied from input */
2502                         /* use default value if one exists */
2503                         Node       *defexpr = build_column_default(cstate->rel, attnum);
2504
2505                         if (defexpr != NULL)
2506                         {
2507                                 /* Initialize expressions in copycontext. */
2508                                 defexprs[num_defaults] = ExecInitExpr(
2509                                                                  expression_planner((Expr *) defexpr), NULL);
2510                                 defmap[num_defaults] = attnum - 1;
2511                                 num_defaults++;
2512
2513                                 if (!volatile_defexprs)
2514                                         volatile_defexprs = contain_volatile_functions(defexpr);
2515                         }
2516                 }
2517         }
2518
2519         /* We keep those variables in cstate. */
2520         cstate->in_functions = in_functions;
2521         cstate->typioparams = typioparams;
2522         cstate->defmap = defmap;
2523         cstate->defexprs = defexprs;
2524         cstate->volatile_defexprs = volatile_defexprs;
2525         cstate->num_defaults = num_defaults;
2526         cstate->is_program = is_program;
2527
2528         if (pipe)
2529         {
2530                 Assert(!is_program);    /* the grammar does not allow this */
2531                 if (whereToSendOutput == DestRemote)
2532                         ReceiveCopyBegin(cstate);
2533                 else
2534                         cstate->copy_file = stdin;
2535         }
2536         else
2537         {
2538                 cstate->filename = pstrdup(filename);
2539
2540                 if (cstate->is_program)
2541                 {
2542                         cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
2543                         if (cstate->copy_file == NULL)
2544                                 ereport(ERROR,
2545                                                 (errmsg("could not execute command \"%s\": %m",
2546                                                                 cstate->filename)));
2547                 }
2548                 else
2549                 {
2550                         struct stat st;
2551
2552                         cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
2553                         if (cstate->copy_file == NULL)
2554                                 ereport(ERROR,
2555                                                 (errcode_for_file_access(),
2556                                                  errmsg("could not open file \"%s\" for reading: %m",
2557                                                                 cstate->filename)));
2558
2559                         fstat(fileno(cstate->copy_file), &st);
2560                         if (S_ISDIR(st.st_mode))
2561                                 ereport(ERROR,
2562                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2563                                                  errmsg("\"%s\" is a directory", cstate->filename)));
2564                 }
2565         }
2566
2567         if (!cstate->binary)
2568         {
2569                 /* must rely on user to tell us... */
2570                 cstate->file_has_oids = cstate->oids;
2571         }
2572         else
2573         {
2574                 /* Read and verify binary header */
2575                 char            readSig[11];
2576                 int32           tmp;
2577
2578                 /* Signature */
2579                 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
2580                         memcmp(readSig, BinarySignature, 11) != 0)
2581                         ereport(ERROR,
2582                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2583                                          errmsg("COPY file signature not recognized")));
2584                 /* Flags field */
2585                 if (!CopyGetInt32(cstate, &tmp))
2586                         ereport(ERROR,
2587                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2588                                          errmsg("invalid COPY file header (missing flags)")));
2589                 cstate->file_has_oids = (tmp & (1 << 16)) != 0;
2590                 tmp &= ~(1 << 16);
2591                 if ((tmp >> 16) != 0)
2592                         ereport(ERROR,
2593                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2594                                  errmsg("unrecognized critical flags in COPY file header")));
2595                 /* Header extension length */
2596                 if (!CopyGetInt32(cstate, &tmp) ||
2597                         tmp < 0)
2598                         ereport(ERROR,
2599                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2600                                          errmsg("invalid COPY file header (missing length)")));
2601                 /* Skip extension header, if present */
2602                 while (tmp-- > 0)
2603                 {
2604                         if (CopyGetData(cstate, readSig, 1, 1) != 1)
2605                                 ereport(ERROR,
2606                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2607                                                  errmsg("invalid COPY file header (wrong length)")));
2608                 }
2609         }
2610
2611         if (cstate->file_has_oids && cstate->binary)
2612         {
2613                 getTypeBinaryInputInfo(OIDOID,
2614                                                            &in_func_oid, &cstate->oid_typioparam);
2615                 fmgr_info(in_func_oid, &cstate->oid_in_function);
2616         }
2617
2618         /* create workspace for CopyReadAttributes results */
2619         if (!cstate->binary)
2620         {
2621                 AttrNumber      attr_count = list_length(cstate->attnumlist);
2622                 int                     nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
2623
2624                 cstate->max_fields = nfields;
2625                 cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
2626         }
2627
2628         MemoryContextSwitchTo(oldcontext);
2629
2630         return cstate;
2631 }
2632
2633 /*
2634  * Read raw fields in the next line for COPY FROM in text or csv mode.
2635  * Return false if no more lines.
2636  *
2637  * An internal temporary buffer is returned via 'fields'. It is valid until
2638  * the next call of the function. Since the function returns all raw fields
2639  * in the input file, 'nfields' could be different from the number of columns
2640  * in the relation.
2641  *
2642  * NOTE: force_not_null option are not applied to the returned fields.
2643  */
2644 bool
2645 NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
2646 {
2647         int                     fldct;
2648         bool            done;
2649
2650         /* only available for text or csv input */
2651         Assert(!cstate->binary);
2652
2653         /* on input just throw the header line away */
2654         if (cstate->cur_lineno == 0 && cstate->header_line)
2655         {
2656                 cstate->cur_lineno++;
2657                 if (CopyReadLine(cstate))
2658                         return false;           /* done */
2659         }
2660
2661         cstate->cur_lineno++;
2662
2663         /* Actually read the line into memory here */
2664         done = CopyReadLine(cstate);
2665
2666         /*
2667          * EOF at start of line means we're done.  If we see EOF after some
2668          * characters, we act as though it was newline followed by EOF, ie,
2669          * process the line and then exit loop on next iteration.
2670          */
2671         if (done && cstate->line_buf.len == 0)
2672                 return false;
2673
2674         /* Parse the line into de-escaped field values */
2675         if (cstate->csv_mode)
2676                 fldct = CopyReadAttributesCSV(cstate);
2677         else
2678                 fldct = CopyReadAttributesText(cstate);
2679
2680         *fields = cstate->raw_fields;
2681         *nfields = fldct;
2682         return true;
2683 }
2684
2685 /*
2686  * Read next tuple from file for COPY FROM. Return false if no more tuples.
2687  *
2688  * 'econtext' is used to evaluate default expression for each columns not
2689  * read from the file. It can be NULL when no default values are used, i.e.
2690  * when all columns are read from the file.
2691  *
2692  * 'values' and 'nulls' arrays must be the same length as columns of the
2693  * relation passed to BeginCopyFrom. This function fills the arrays.
2694  * Oid of the tuple is returned with 'tupleOid' separately.
2695  */
2696 bool
2697 NextCopyFrom(CopyState cstate, ExprContext *econtext,
2698                          Datum *values, bool *nulls, Oid *tupleOid)
2699 {
2700         TupleDesc       tupDesc;
2701         Form_pg_attribute *attr;
2702         AttrNumber      num_phys_attrs,
2703                                 attr_count,
2704                                 num_defaults = cstate->num_defaults;
2705         FmgrInfo   *in_functions = cstate->in_functions;
2706         Oid                *typioparams = cstate->typioparams;
2707         int                     i;
2708         int                     nfields;
2709         bool            isnull;
2710         bool            file_has_oids = cstate->file_has_oids;
2711         int                *defmap = cstate->defmap;
2712         ExprState **defexprs = cstate->defexprs;
2713
2714         tupDesc = RelationGetDescr(cstate->rel);
2715         attr = tupDesc->attrs;
2716         num_phys_attrs = tupDesc->natts;
2717         attr_count = list_length(cstate->attnumlist);
2718         nfields = file_has_oids ? (attr_count + 1) : attr_count;
2719
2720         /* Initialize all values for row to NULL */
2721         MemSet(values, 0, num_phys_attrs * sizeof(Datum));
2722         MemSet(nulls, true, num_phys_attrs * sizeof(bool));
2723
2724         if (!cstate->binary)
2725         {
2726                 char      **field_strings;
2727                 ListCell   *cur;
2728                 int                     fldct;
2729                 int                     fieldno;
2730                 char       *string;
2731
2732                 /* read raw fields in the next line */
2733                 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
2734                         return false;
2735
2736                 /* check for overflowing fields */
2737                 if (nfields > 0 && fldct > nfields)
2738                         ereport(ERROR,
2739                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2740                                          errmsg("extra data after last expected column")));
2741
2742                 fieldno = 0;
2743
2744                 /* Read the OID field if present */
2745                 if (file_has_oids)
2746                 {
2747                         if (fieldno >= fldct)
2748                                 ereport(ERROR,
2749                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2750                                                  errmsg("missing data for OID column")));
2751                         string = field_strings[fieldno++];
2752
2753                         if (string == NULL)
2754                                 ereport(ERROR,
2755                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2756                                                  errmsg("null OID in COPY data")));
2757                         else if (cstate->oids && tupleOid != NULL)
2758                         {
2759                                 cstate->cur_attname = "oid";
2760                                 cstate->cur_attval = string;
2761                                 *tupleOid = DatumGetObjectId(DirectFunctionCall1(oidin,
2762                                                                                                    CStringGetDatum(string)));
2763                                 if (*tupleOid == InvalidOid)
2764                                         ereport(ERROR,
2765                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2766                                                          errmsg("invalid OID in COPY data")));
2767                                 cstate->cur_attname = NULL;
2768                                 cstate->cur_attval = NULL;
2769                         }
2770                 }
2771
2772                 /* Loop to read the user attributes on the line. */
2773                 foreach(cur, cstate->attnumlist)
2774                 {
2775                         int                     attnum = lfirst_int(cur);
2776                         int                     m = attnum - 1;
2777
2778                         if (fieldno >= fldct)
2779                                 ereport(ERROR,
2780                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2781                                                  errmsg("missing data for column \"%s\"",
2782                                                                 NameStr(attr[m]->attname))));
2783                         string = field_strings[fieldno++];
2784
2785                         if (cstate->convert_select_flags &&
2786                                 !cstate->convert_select_flags[m])
2787                         {
2788                                 /* ignore input field, leaving column as NULL */
2789                                 continue;
2790                         }
2791
2792                         if (cstate->csv_mode && string == NULL &&
2793                                 cstate->force_notnull_flags[m])
2794                         {
2795                                 /* Go ahead and read the NULL string */
2796                                 string = cstate->null_print;
2797                         }
2798
2799                         cstate->cur_attname = NameStr(attr[m]->attname);
2800                         cstate->cur_attval = string;
2801                         values[m] = InputFunctionCall(&in_functions[m],
2802                                                                                   string,
2803                                                                                   typioparams[m],
2804                                                                                   attr[m]->atttypmod);
2805                         if (string != NULL)
2806                                 nulls[m] = false;
2807                         cstate->cur_attname = NULL;
2808                         cstate->cur_attval = NULL;
2809                 }
2810
2811                 Assert(fieldno == nfields);
2812         }
2813         else
2814         {
2815                 /* binary */
2816                 int16           fld_count;
2817                 ListCell   *cur;
2818
2819                 cstate->cur_lineno++;
2820
2821                 if (!CopyGetInt16(cstate, &fld_count))
2822                 {
2823                         /* EOF detected (end of file, or protocol-level EOF) */
2824                         return false;
2825                 }
2826
2827                 if (fld_count == -1)
2828                 {
2829                         /*
2830                          * Received EOF marker.  In a V3-protocol copy, wait for the
2831                          * protocol-level EOF, and complain if it doesn't come
2832                          * immediately.  This ensures that we correctly handle CopyFail,
2833                          * if client chooses to send that now.
2834                          *
2835                          * Note that we MUST NOT try to read more data in an old-protocol
2836                          * copy, since there is no protocol-level EOF marker then.      We
2837                          * could go either way for copy from file, but choose to throw
2838                          * error if there's data after the EOF marker, for consistency
2839                          * with the new-protocol case.
2840                          */
2841                         char            dummy;
2842
2843                         if (cstate->copy_dest != COPY_OLD_FE &&
2844                                 CopyGetData(cstate, &dummy, 1, 1) > 0)
2845                                 ereport(ERROR,
2846                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2847                                                  errmsg("received copy data after EOF marker")));
2848                         return false;
2849                 }
2850
2851                 if (fld_count != attr_count)
2852                         ereport(ERROR,
2853                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2854                                          errmsg("row field count is %d, expected %d",
2855                                                         (int) fld_count, attr_count)));
2856
2857                 if (file_has_oids)
2858                 {
2859                         Oid                     loaded_oid;
2860
2861                         cstate->cur_attname = "oid";
2862                         loaded_oid =
2863                                 DatumGetObjectId(CopyReadBinaryAttribute(cstate,
2864                                                                                                                  0,
2865                                                                                                         &cstate->oid_in_function,
2866                                                                                                           cstate->oid_typioparam,
2867                                                                                                                  -1,
2868                                                                                                                  &isnull));
2869                         if (isnull || loaded_oid == InvalidOid)
2870                                 ereport(ERROR,
2871                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2872                                                  errmsg("invalid OID in COPY data")));
2873                         cstate->cur_attname = NULL;
2874                         if (cstate->oids && tupleOid != NULL)
2875                                 *tupleOid = loaded_oid;
2876                 }
2877
2878                 i = 0;
2879                 foreach(cur, cstate->attnumlist)
2880                 {
2881                         int                     attnum = lfirst_int(cur);
2882                         int                     m = attnum - 1;
2883
2884                         cstate->cur_attname = NameStr(attr[m]->attname);
2885                         i++;
2886                         values[m] = CopyReadBinaryAttribute(cstate,
2887                                                                                                 i,
2888                                                                                                 &in_functions[m],
2889                                                                                                 typioparams[m],
2890                                                                                                 attr[m]->atttypmod,
2891                                                                                                 &nulls[m]);
2892                         cstate->cur_attname = NULL;
2893                 }
2894         }
2895
2896         /*
2897          * Now compute and insert any defaults available for the columns not
2898          * provided by the input data.  Anything not processed here or above will
2899          * remain NULL.
2900          */
2901         for (i = 0; i < num_defaults; i++)
2902         {
2903                 /*
2904                  * The caller must supply econtext and have switched into the
2905                  * per-tuple memory context in it.
2906                  */
2907                 Assert(econtext != NULL);
2908                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
2909
2910                 values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
2911                                                                                  &nulls[defmap[i]], NULL);
2912         }
2913
2914         return true;
2915 }
2916
2917 /*
2918  * Clean up storage and release resources for COPY FROM.
2919  */
2920 void
2921 EndCopyFrom(CopyState cstate)
2922 {
2923         /* No COPY FROM related resources except memory. */
2924
2925         EndCopy(cstate);
2926 }
2927
2928 /*
2929  * Read the next input line and stash it in line_buf, with conversion to
2930  * server encoding.
2931  *
2932  * Result is true if read was terminated by EOF, false if terminated
2933  * by newline.  The terminating newline or EOF marker is not included
2934  * in the final value of line_buf.
2935  */
2936 static bool
2937 CopyReadLine(CopyState cstate)
2938 {
2939         bool            result;
2940
2941         resetStringInfo(&cstate->line_buf);
2942         cstate->line_buf_valid = true;
2943
2944         /* Mark that encoding conversion hasn't occurred yet */
2945         cstate->line_buf_converted = false;
2946
2947         /* Parse data and transfer into line_buf */
2948         result = CopyReadLineText(cstate);
2949
2950         if (result)
2951         {
2952                 /*
2953                  * Reached EOF.  In protocol version 3, we should ignore anything
2954                  * after \. up to the protocol end of copy data.  (XXX maybe better
2955                  * not to treat \. as special?)
2956                  */
2957                 if (cstate->copy_dest == COPY_NEW_FE)
2958                 {
2959                         do
2960                         {
2961                                 cstate->raw_buf_index = cstate->raw_buf_len;
2962                         } while (CopyLoadRawBuf(cstate));
2963                 }
2964         }
2965         else
2966         {
2967                 /*
2968                  * If we didn't hit EOF, then we must have transferred the EOL marker
2969                  * to line_buf along with the data.  Get rid of it.
2970                  */
2971                 switch (cstate->eol_type)
2972                 {
2973                         case EOL_NL:
2974                                 Assert(cstate->line_buf.len >= 1);
2975                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2976                                 cstate->line_buf.len--;
2977                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2978                                 break;
2979                         case EOL_CR:
2980                                 Assert(cstate->line_buf.len >= 1);
2981                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
2982                                 cstate->line_buf.len--;
2983                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2984                                 break;
2985                         case EOL_CRNL:
2986                                 Assert(cstate->line_buf.len >= 2);
2987                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
2988                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2989                                 cstate->line_buf.len -= 2;
2990                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2991                                 break;
2992                         case EOL_UNKNOWN:
2993                                 /* shouldn't get here */
2994                                 Assert(false);
2995                                 break;
2996                 }
2997         }
2998
2999         /* Done reading the line.  Convert it to server encoding. */
3000         if (cstate->need_transcoding)
3001         {
3002                 char       *cvt;
3003
3004                 cvt = pg_any_to_server(cstate->line_buf.data,
3005                                                            cstate->line_buf.len,
3006                                                            cstate->file_encoding);
3007                 if (cvt != cstate->line_buf.data)
3008                 {
3009                         /* transfer converted data back to line_buf */
3010                         resetStringInfo(&cstate->line_buf);
3011                         appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
3012                         pfree(cvt);
3013                 }
3014         }
3015
3016         /* Now it's safe to use the buffer in error messages */
3017         cstate->line_buf_converted = true;
3018
3019         return result;
3020 }
3021
3022 /*
3023  * CopyReadLineText - inner loop of CopyReadLine for text mode
3024  */
3025 static bool
3026 CopyReadLineText(CopyState cstate)
3027 {
3028         char       *copy_raw_buf;
3029         int                     raw_buf_ptr;
3030         int                     copy_buf_len;
3031         bool            need_data = false;
3032         bool            hit_eof = false;
3033         bool            result = false;
3034         char            mblen_str[2];
3035
3036         /* CSV variables */
3037         bool            first_char_in_line = true;
3038         bool            in_quote = false,
3039                                 last_was_esc = false;
3040         char            quotec = '\0';
3041         char            escapec = '\0';
3042
3043         if (cstate->csv_mode)
3044         {
3045                 quotec = cstate->quote[0];
3046                 escapec = cstate->escape[0];
3047                 /* ignore special escape processing if it's the same as quotec */
3048                 if (quotec == escapec)
3049                         escapec = '\0';
3050         }
3051
3052         mblen_str[1] = '\0';
3053
3054         /*
3055          * The objective of this loop is to transfer the entire next input line
3056          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
3057          * \n) and the end-of-copy marker (\.).
3058          *
3059          * In CSV mode, \r and \n inside a quoted field are just part of the data
3060          * value and are put in line_buf.  We keep just enough state to know if we
3061          * are currently in a quoted field or not.
3062          *
3063          * These four characters, and the CSV escape and quote characters, are
3064          * assumed the same in frontend and backend encodings.
3065          *
3066          * For speed, we try to move data from raw_buf to line_buf in chunks
3067          * rather than one character at a time.  raw_buf_ptr points to the next
3068          * character to examine; any characters from raw_buf_index to raw_buf_ptr
3069          * have been determined to be part of the line, but not yet transferred to
3070          * line_buf.
3071          *
3072          * For a little extra speed within the loop, we copy raw_buf and
3073          * raw_buf_len into local variables.
3074          */
3075         copy_raw_buf = cstate->raw_buf;
3076         raw_buf_ptr = cstate->raw_buf_index;
3077         copy_buf_len = cstate->raw_buf_len;
3078
3079         for (;;)
3080         {
3081                 int                     prev_raw_ptr;
3082                 char            c;
3083
3084                 /*
3085                  * Load more data if needed.  Ideally we would just force four bytes
3086                  * of read-ahead and avoid the many calls to
3087                  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
3088                  * does not allow us to read too far ahead or we might read into the
3089                  * next data, so we read-ahead only as far we know we can.      One
3090                  * optimization would be to read-ahead four byte here if
3091                  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
3092                  * considering the size of the buffer.
3093                  */
3094                 if (raw_buf_ptr >= copy_buf_len || need_data)
3095                 {
3096                         REFILL_LINEBUF;
3097
3098                         /*
3099                          * Try to read some more data.  This will certainly reset
3100                          * raw_buf_index to zero, and raw_buf_ptr must go with it.
3101                          */
3102                         if (!CopyLoadRawBuf(cstate))
3103                                 hit_eof = true;
3104                         raw_buf_ptr = 0;
3105                         copy_buf_len = cstate->raw_buf_len;
3106
3107                         /*
3108                          * If we are completely out of data, break out of the loop,
3109                          * reporting EOF.
3110                          */
3111                         if (copy_buf_len <= 0)
3112                         {
3113                                 result = true;
3114                                 break;
3115                         }
3116                         need_data = false;
3117                 }
3118
3119                 /* OK to fetch a character */
3120                 prev_raw_ptr = raw_buf_ptr;
3121                 c = copy_raw_buf[raw_buf_ptr++];
3122
3123                 if (cstate->csv_mode)
3124                 {
3125                         /*
3126                          * If character is '\\' or '\r', we may need to look ahead below.
3127                          * Force fetch of the next character if we don't already have it.
3128                          * We need to do this before changing CSV state, in case one of
3129                          * these characters is also the quote or escape character.
3130                          *
3131                          * Note: old-protocol does not like forced prefetch, but it's OK
3132                          * here since we cannot validly be at EOF.
3133                          */
3134                         if (c == '\\' || c == '\r')
3135                         {
3136                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3137                         }
3138
3139                         /*
3140                          * Dealing with quotes and escapes here is mildly tricky. If the
3141                          * quote char is also the escape char, there's no problem - we
3142                          * just use the char as a toggle. If they are different, we need
3143                          * to ensure that we only take account of an escape inside a
3144                          * quoted field and immediately preceding a quote char, and not
3145                          * the second in a escape-escape sequence.
3146                          */
3147                         if (in_quote && c == escapec)
3148                                 last_was_esc = !last_was_esc;
3149                         if (c == quotec && !last_was_esc)
3150                                 in_quote = !in_quote;
3151                         if (c != escapec)
3152                                 last_was_esc = false;
3153
3154                         /*
3155                          * Updating the line count for embedded CR and/or LF chars is
3156                          * necessarily a little fragile - this test is probably about the
3157                          * best we can do.      (XXX it's arguable whether we should do this
3158                          * at all --- is cur_lineno a physical or logical count?)
3159                          */
3160                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
3161                                 cstate->cur_lineno++;
3162                 }
3163
3164                 /* Process \r */
3165                 if (c == '\r' && (!cstate->csv_mode || !in_quote))
3166                 {
3167                         /* Check for \r\n on first line, _and_ handle \r\n. */
3168                         if (cstate->eol_type == EOL_UNKNOWN ||
3169                                 cstate->eol_type == EOL_CRNL)
3170                         {
3171                                 /*
3172                                  * If need more data, go back to loop top to load it.
3173                                  *
3174                                  * Note that if we are at EOF, c will wind up as '\0' because
3175                                  * of the guaranteed pad of raw_buf.
3176                                  */
3177                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3178
3179                                 /* get next char */
3180                                 c = copy_raw_buf[raw_buf_ptr];
3181
3182                                 if (c == '\n')
3183                                 {
3184                                         raw_buf_ptr++;          /* eat newline */
3185                                         cstate->eol_type = EOL_CRNL;            /* in case not set yet */
3186                                 }
3187                                 else
3188                                 {
3189                                         /* found \r, but no \n */
3190                                         if (cstate->eol_type == EOL_CRNL)
3191                                                 ereport(ERROR,
3192                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3193                                                                  !cstate->csv_mode ?
3194                                                         errmsg("literal carriage return found in data") :
3195                                                         errmsg("unquoted carriage return found in data"),
3196                                                                  !cstate->csv_mode ?
3197                                                 errhint("Use \"\\r\" to represent carriage return.") :
3198                                                                  errhint("Use quoted CSV field to represent carriage return.")));
3199
3200                                         /*
3201                                          * if we got here, it is the first line and we didn't find
3202                                          * \n, so don't consume the peeked character
3203                                          */
3204                                         cstate->eol_type = EOL_CR;
3205                                 }
3206                         }
3207                         else if (cstate->eol_type == EOL_NL)
3208                                 ereport(ERROR,
3209                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3210                                                  !cstate->csv_mode ?
3211                                                  errmsg("literal carriage return found in data") :
3212                                                  errmsg("unquoted carriage return found in data"),
3213                                                  !cstate->csv_mode ?
3214                                            errhint("Use \"\\r\" to represent carriage return.") :
3215                                                  errhint("Use quoted CSV field to represent carriage return.")));
3216                         /* If reach here, we have found the line terminator */
3217                         break;
3218                 }
3219
3220                 /* Process \n */
3221                 if (c == '\n' && (!cstate->csv_mode || !in_quote))
3222                 {
3223                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
3224                                 ereport(ERROR,
3225                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3226                                                  !cstate->csv_mode ?
3227                                                  errmsg("literal newline found in data") :
3228                                                  errmsg("unquoted newline found in data"),
3229                                                  !cstate->csv_mode ?
3230                                                  errhint("Use \"\\n\" to represent newline.") :
3231                                          errhint("Use quoted CSV field to represent newline.")));
3232                         cstate->eol_type = EOL_NL;      /* in case not set yet */
3233                         /* If reach here, we have found the line terminator */
3234                         break;
3235                 }
3236
3237                 /*
3238                  * In CSV mode, we only recognize \. alone on a line.  This is because
3239                  * \. is a valid CSV data value.
3240                  */
3241                 if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
3242                 {
3243                         char            c2;
3244
3245                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3246                         IF_NEED_REFILL_AND_EOF_BREAK(0);
3247
3248                         /* -----
3249                          * get next character
3250                          * Note: we do not change c so if it isn't \., we can fall
3251                          * through and continue processing for file encoding.
3252                          * -----
3253                          */
3254                         c2 = copy_raw_buf[raw_buf_ptr];
3255
3256                         if (c2 == '.')
3257                         {
3258                                 raw_buf_ptr++;  /* consume the '.' */
3259
3260                                 /*
3261                                  * Note: if we loop back for more data here, it does not
3262                                  * matter that the CSV state change checks are re-executed; we
3263                                  * will come back here with no important state changed.
3264                                  */
3265                                 if (cstate->eol_type == EOL_CRNL)
3266                                 {
3267                                         /* Get the next character */
3268                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3269                                         /* if hit_eof, c2 will become '\0' */
3270                                         c2 = copy_raw_buf[raw_buf_ptr++];
3271
3272                                         if (c2 == '\n')
3273                                         {
3274                                                 if (!cstate->csv_mode)
3275                                                         ereport(ERROR,
3276                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3277                                                                          errmsg("end-of-copy marker does not match previous newline style")));
3278                                                 else
3279                                                         NO_END_OF_COPY_GOTO;
3280                                         }
3281                                         else if (c2 != '\r')
3282                                         {
3283                                                 if (!cstate->csv_mode)
3284                                                         ereport(ERROR,
3285                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3286                                                                          errmsg("end-of-copy marker corrupt")));
3287                                                 else
3288                                                         NO_END_OF_COPY_GOTO;
3289                                         }
3290                                 }
3291
3292                                 /* Get the next character */
3293                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3294                                 /* if hit_eof, c2 will become '\0' */
3295                                 c2 = copy_raw_buf[raw_buf_ptr++];
3296
3297                                 if (c2 != '\r' && c2 != '\n')
3298                                 {
3299                                         if (!cstate->csv_mode)
3300                                                 ereport(ERROR,
3301                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3302                                                                  errmsg("end-of-copy marker corrupt")));
3303                                         else
3304                                                 NO_END_OF_COPY_GOTO;
3305                                 }
3306
3307                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
3308                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
3309                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
3310                                 {
3311                                         ereport(ERROR,
3312                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3313                                                          errmsg("end-of-copy marker does not match previous newline style")));
3314                                 }
3315
3316                                 /*
3317                                  * Transfer only the data before the \. into line_buf, then
3318                                  * discard the data and the \. sequence.
3319                                  */
3320                                 if (prev_raw_ptr > cstate->raw_buf_index)
3321                                         appendBinaryStringInfo(&cstate->line_buf,
3322                                                                          cstate->raw_buf + cstate->raw_buf_index,
3323                                                                            prev_raw_ptr - cstate->raw_buf_index);
3324                                 cstate->raw_buf_index = raw_buf_ptr;
3325                                 result = true;  /* report EOF */
3326                                 break;
3327                         }
3328                         else if (!cstate->csv_mode)
3329
3330                                 /*
3331                                  * If we are here, it means we found a backslash followed by
3332                                  * something other than a period.  In non-CSV mode, anything
3333                                  * after a backslash is special, so we skip over that second
3334                                  * character too.  If we didn't do that \\. would be
3335                                  * considered an eof-of copy, while in non-CSV mode it is a
3336                                  * literal backslash followed by a period.      In CSV mode,
3337                                  * backslashes are not special, so we want to process the
3338                                  * character after the backslash just like a normal character,
3339                                  * so we don't increment in those cases.
3340                                  */
3341                                 raw_buf_ptr++;
3342                 }
3343
3344                 /*
3345                  * This label is for CSV cases where \. appears at the start of a
3346                  * line, but there is more text after it, meaning it was a data value.
3347                  * We are more strict for \. in CSV mode because \. could be a data
3348                  * value, while in non-CSV mode, \. cannot be a data value.
3349                  */
3350 not_end_of_copy:
3351
3352                 /*
3353                  * Process all bytes of a multi-byte character as a group.
3354                  *
3355                  * We only support multi-byte sequences where the first byte has the
3356                  * high-bit set, so as an optimization we can avoid this block
3357                  * entirely if it is not set.
3358                  */
3359                 if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3360                 {
3361                         int                     mblen;
3362
3363                         mblen_str[0] = c;
3364                         /* All our encodings only read the first byte to get the length */
3365                         mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
3366                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
3367                         IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
3368                         raw_buf_ptr += mblen - 1;
3369                 }
3370                 first_char_in_line = false;
3371         }                                                       /* end of outer loop */
3372
3373         /*
3374          * Transfer any still-uncopied data to line_buf.
3375          */
3376         REFILL_LINEBUF;
3377
3378         return result;
3379 }
3380
3381 /*
3382  *      Return decimal value for a hexadecimal digit
3383  */
3384 static int
3385 GetDecimalFromHex(char hex)
3386 {
3387         if (isdigit((unsigned char) hex))
3388                 return hex - '0';
3389         else
3390                 return tolower((unsigned char) hex) - 'a' + 10;
3391 }
3392
3393 /*
3394  * Parse the current line into separate attributes (fields),
3395  * performing de-escaping as needed.
3396  *
3397  * The input is in line_buf.  We use attribute_buf to hold the result
3398  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
3399  * string, or NULL when the input matches the null marker string.
3400  * This array is expanded as necessary.
3401  *
3402  * (Note that the caller cannot check for nulls since the returned
3403  * string would be the post-de-escaping equivalent, which may look
3404  * the same as some valid data string.)
3405  *
3406  * delim is the column delimiter string (must be just one byte for now).
3407  * null_print is the null marker string.  Note that this is compared to
3408  * the pre-de-escaped input string.
3409  *
3410  * The return value is the number of fields actually read.
3411  */
3412 static int
3413 CopyReadAttributesText(CopyState cstate)
3414 {
3415         char            delimc = cstate->delim[0];
3416         int                     fieldno;
3417         char       *output_ptr;
3418         char       *cur_ptr;
3419         char       *line_end_ptr;
3420
3421         /*
3422          * We need a special case for zero-column tables: check that the input
3423          * line is empty, and return.
3424          */
3425         if (cstate->max_fields <= 0)
3426         {
3427                 if (cstate->line_buf.len != 0)
3428                         ereport(ERROR,
3429                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3430                                          errmsg("extra data after last expected column")));
3431                 return 0;
3432         }
3433
3434         resetStringInfo(&cstate->attribute_buf);
3435
3436         /*
3437          * The de-escaped attributes will certainly not be longer than the input
3438          * data line, so we can just force attribute_buf to be large enough and
3439          * then transfer data without any checks for enough space.      We need to do
3440          * it this way because enlarging attribute_buf mid-stream would invalidate
3441          * pointers already stored into cstate->raw_fields[].
3442          */
3443         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3444                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3445         output_ptr = cstate->attribute_buf.data;
3446
3447         /* set pointer variables for loop */
3448         cur_ptr = cstate->line_buf.data;
3449         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3450
3451         /* Outer loop iterates over fields */
3452         fieldno = 0;
3453         for (;;)
3454         {
3455                 bool            found_delim = false;
3456                 char       *start_ptr;
3457                 char       *end_ptr;
3458                 int                     input_len;
3459                 bool            saw_non_ascii = false;
3460
3461                 /* Make sure there is enough space for the next value */
3462                 if (fieldno >= cstate->max_fields)
3463                 {
3464                         cstate->max_fields *= 2;
3465                         cstate->raw_fields =
3466                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3467                 }
3468
3469                 /* Remember start of field on both input and output sides */
3470                 start_ptr = cur_ptr;
3471                 cstate->raw_fields[fieldno] = output_ptr;
3472
3473                 /*
3474                  * Scan data for field.
3475                  *
3476                  * Note that in this loop, we are scanning to locate the end of field
3477                  * and also speculatively performing de-escaping.  Once we find the
3478                  * end-of-field, we can match the raw field contents against the null
3479                  * marker string.  Only after that comparison fails do we know that
3480                  * de-escaping is actually the right thing to do; therefore we *must
3481                  * not* throw any syntax errors before we've done the null-marker
3482                  * check.
3483                  */
3484                 for (;;)
3485                 {
3486                         char            c;
3487
3488                         end_ptr = cur_ptr;
3489                         if (cur_ptr >= line_end_ptr)
3490                                 break;
3491                         c = *cur_ptr++;
3492                         if (c == delimc)
3493                         {
3494                                 found_delim = true;
3495                                 break;
3496                         }
3497                         if (c == '\\')
3498                         {
3499                                 if (cur_ptr >= line_end_ptr)
3500                                         break;
3501                                 c = *cur_ptr++;
3502                                 switch (c)
3503                                 {
3504                                         case '0':
3505                                         case '1':
3506                                         case '2':
3507                                         case '3':
3508                                         case '4':
3509                                         case '5':
3510                                         case '6':
3511                                         case '7':
3512                                                 {
3513                                                         /* handle \013 */
3514                                                         int                     val;
3515
3516                                                         val = OCTVALUE(c);
3517                                                         if (cur_ptr < line_end_ptr)
3518                                                         {
3519                                                                 c = *cur_ptr;
3520                                                                 if (ISOCTAL(c))
3521                                                                 {
3522                                                                         cur_ptr++;
3523                                                                         val = (val << 3) + OCTVALUE(c);
3524                                                                         if (cur_ptr < line_end_ptr)
3525                                                                         {
3526                                                                                 c = *cur_ptr;
3527                                                                                 if (ISOCTAL(c))
3528                                                                                 {
3529                                                                                         cur_ptr++;
3530                                                                                         val = (val << 3) + OCTVALUE(c);
3531                                                                                 }
3532                                                                         }
3533                                                                 }
3534                                                         }
3535                                                         c = val & 0377;
3536                                                         if (c == '\0' || IS_HIGHBIT_SET(c))
3537                                                                 saw_non_ascii = true;
3538                                                 }
3539                                                 break;
3540                                         case 'x':
3541                                                 /* Handle \x3F */
3542                                                 if (cur_ptr < line_end_ptr)
3543                                                 {
3544                                                         char            hexchar = *cur_ptr;
3545
3546                                                         if (isxdigit((unsigned char) hexchar))
3547                                                         {
3548                                                                 int                     val = GetDecimalFromHex(hexchar);
3549
3550                                                                 cur_ptr++;
3551                                                                 if (cur_ptr < line_end_ptr)
3552                                                                 {
3553                                                                         hexchar = *cur_ptr;
3554                                                                         if (isxdigit((unsigned char) hexchar))
3555                                                                         {
3556                                                                                 cur_ptr++;
3557                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
3558                                                                         }
3559                                                                 }
3560                                                                 c = val & 0xff;
3561                                                                 if (c == '\0' || IS_HIGHBIT_SET(c))
3562                                                                         saw_non_ascii = true;
3563                                                         }
3564                                                 }
3565                                                 break;
3566                                         case 'b':
3567                                                 c = '\b';
3568                                                 break;
3569                                         case 'f':
3570                                                 c = '\f';
3571                                                 break;
3572                                         case 'n':
3573                                                 c = '\n';
3574                                                 break;
3575                                         case 'r':
3576                                                 c = '\r';
3577                                                 break;
3578                                         case 't':
3579                                                 c = '\t';
3580                                                 break;
3581                                         case 'v':
3582                                                 c = '\v';
3583                                                 break;
3584
3585                                                 /*
3586                                                  * in all other cases, take the char after '\'
3587                                                  * literally
3588                                                  */
3589                                 }
3590                         }
3591
3592                         /* Add c to output string */
3593                         *output_ptr++ = c;
3594                 }
3595
3596                 /* Check whether raw input matched null marker */
3597                 input_len = end_ptr - start_ptr;
3598                 if (input_len == cstate->null_print_len &&
3599                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3600                         cstate->raw_fields[fieldno] = NULL;
3601                 else
3602                 {
3603                         /*
3604                          * At this point we know the field is supposed to contain data.
3605                          *
3606                          * If we de-escaped any non-7-bit-ASCII chars, make sure the
3607                          * resulting string is valid data for the db encoding.
3608                          */
3609                         if (saw_non_ascii)
3610                         {
3611                                 char       *fld = cstate->raw_fields[fieldno];
3612
3613                                 pg_verifymbstr(fld, output_ptr - fld, false);
3614                         }
3615                 }
3616
3617                 /* Terminate attribute value in output area */
3618                 *output_ptr++ = '\0';
3619
3620                 fieldno++;
3621                 /* Done if we hit EOL instead of a delim */
3622                 if (!found_delim)
3623                         break;
3624         }
3625
3626         /* Clean up state of attribute_buf */
3627         output_ptr--;
3628         Assert(*output_ptr == '\0');
3629         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3630
3631         return fieldno;
3632 }
3633
3634 /*
3635  * Parse the current line into separate attributes (fields),
3636  * performing de-escaping as needed.  This has exactly the same API as
3637  * CopyReadAttributesText, except we parse the fields according to
3638  * "standard" (i.e. common) CSV usage.
3639  */
3640 static int
3641 CopyReadAttributesCSV(CopyState cstate)
3642 {
3643         char            delimc = cstate->delim[0];
3644         char            quotec = cstate->quote[0];
3645         char            escapec = cstate->escape[0];
3646         int                     fieldno;
3647         char       *output_ptr;
3648         char       *cur_ptr;
3649         char       *line_end_ptr;
3650
3651         /*
3652          * We need a special case for zero-column tables: check that the input
3653          * line is empty, and return.
3654          */
3655         if (cstate->max_fields <= 0)
3656         {
3657                 if (cstate->line_buf.len != 0)
3658                         ereport(ERROR,
3659                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3660                                          errmsg("extra data after last expected column")));
3661                 return 0;
3662         }
3663
3664         resetStringInfo(&cstate->attribute_buf);
3665
3666         /*
3667          * The de-escaped attributes will certainly not be longer than the input
3668          * data line, so we can just force attribute_buf to be large enough and
3669          * then transfer data without any checks for enough space.      We need to do
3670          * it this way because enlarging attribute_buf mid-stream would invalidate
3671          * pointers already stored into cstate->raw_fields[].
3672          */
3673         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3674                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3675         output_ptr = cstate->attribute_buf.data;
3676
3677         /* set pointer variables for loop */
3678         cur_ptr = cstate->line_buf.data;
3679         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3680
3681         /* Outer loop iterates over fields */
3682         fieldno = 0;
3683         for (;;)
3684         {
3685                 bool            found_delim = false;
3686                 bool            saw_quote = false;
3687                 char       *start_ptr;
3688                 char       *end_ptr;
3689                 int                     input_len;
3690
3691                 /* Make sure there is enough space for the next value */
3692                 if (fieldno >= cstate->max_fields)
3693                 {
3694                         cstate->max_fields *= 2;
3695                         cstate->raw_fields =
3696                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3697                 }
3698
3699                 /* Remember start of field on both input and output sides */
3700                 start_ptr = cur_ptr;
3701                 cstate->raw_fields[fieldno] = output_ptr;
3702
3703                 /*
3704                  * Scan data for field,
3705                  *
3706                  * The loop starts in "not quote" mode and then toggles between that
3707                  * and "in quote" mode. The loop exits normally if it is in "not
3708                  * quote" mode and a delimiter or line end is seen.
3709                  */
3710                 for (;;)
3711                 {
3712                         char            c;
3713
3714                         /* Not in quote */
3715                         for (;;)
3716                         {
3717                                 end_ptr = cur_ptr;
3718                                 if (cur_ptr >= line_end_ptr)
3719                                         goto endfield;
3720                                 c = *cur_ptr++;
3721                                 /* unquoted field delimiter */
3722                                 if (c == delimc)
3723                                 {
3724                                         found_delim = true;
3725                                         goto endfield;
3726                                 }
3727                                 /* start of quoted field (or part of field) */
3728                                 if (c == quotec)
3729                                 {
3730                                         saw_quote = true;
3731                                         break;
3732                                 }
3733                                 /* Add c to output string */
3734                                 *output_ptr++ = c;
3735                         }
3736
3737                         /* In quote */
3738                         for (;;)
3739                         {
3740                                 end_ptr = cur_ptr;
3741                                 if (cur_ptr >= line_end_ptr)
3742                                         ereport(ERROR,
3743                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3744                                                          errmsg("unterminated CSV quoted field")));
3745
3746                                 c = *cur_ptr++;
3747
3748                                 /* escape within a quoted field */
3749                                 if (c == escapec)
3750                                 {
3751                                         /*
3752                                          * peek at the next char if available, and escape it if it
3753                                          * is an escape char or a quote char
3754                                          */
3755                                         if (cur_ptr < line_end_ptr)
3756                                         {
3757                                                 char            nextc = *cur_ptr;
3758
3759                                                 if (nextc == escapec || nextc == quotec)
3760                                                 {
3761                                                         *output_ptr++ = nextc;
3762                                                         cur_ptr++;
3763                                                         continue;
3764                                                 }
3765                                         }
3766                                 }
3767
3768                                 /*
3769                                  * end of quoted field. Must do this test after testing for
3770                                  * escape in case quote char and escape char are the same
3771                                  * (which is the common case).
3772                                  */
3773                                 if (c == quotec)
3774                                         break;
3775
3776                                 /* Add c to output string */
3777                                 *output_ptr++ = c;
3778                         }
3779                 }
3780 endfield:
3781
3782                 /* Terminate attribute value in output area */
3783                 *output_ptr++ = '\0';
3784
3785                 /* Check whether raw input matched null marker */
3786                 input_len = end_ptr - start_ptr;
3787                 if (!saw_quote && input_len == cstate->null_print_len &&
3788                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3789                         cstate->raw_fields[fieldno] = NULL;
3790
3791                 fieldno++;
3792                 /* Done if we hit EOL instead of a delim */
3793                 if (!found_delim)
3794                         break;
3795         }
3796
3797         /* Clean up state of attribute_buf */
3798         output_ptr--;
3799         Assert(*output_ptr == '\0');
3800         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3801
3802         return fieldno;
3803 }
3804
3805
3806 /*
3807  * Read a binary attribute
3808  */
3809 static Datum
3810 CopyReadBinaryAttribute(CopyState cstate,
3811                                                 int column_no, FmgrInfo *flinfo,
3812                                                 Oid typioparam, int32 typmod,
3813                                                 bool *isnull)
3814 {
3815         int32           fld_size;
3816         Datum           result;
3817
3818         if (!CopyGetInt32(cstate, &fld_size))
3819                 ereport(ERROR,
3820                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3821                                  errmsg("unexpected EOF in COPY data")));
3822         if (fld_size == -1)
3823         {
3824                 *isnull = true;
3825                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
3826         }
3827         if (fld_size < 0)
3828                 ereport(ERROR,
3829                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3830                                  errmsg("invalid field size")));
3831
3832         /* reset attribute_buf to empty, and load raw data in it */
3833         resetStringInfo(&cstate->attribute_buf);
3834
3835         enlargeStringInfo(&cstate->attribute_buf, fld_size);
3836         if (CopyGetData(cstate, cstate->attribute_buf.data,
3837                                         fld_size, fld_size) != fld_size)
3838                 ereport(ERROR,
3839                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3840                                  errmsg("unexpected EOF in COPY data")));
3841
3842         cstate->attribute_buf.len = fld_size;
3843         cstate->attribute_buf.data[fld_size] = '\0';
3844
3845         /* Call the column type's binary input converter */
3846         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
3847                                                                  typioparam, typmod);
3848
3849         /* Trouble if it didn't eat the whole buffer */
3850         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3851                 ereport(ERROR,
3852                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3853                                  errmsg("incorrect binary data format")));
3854
3855         *isnull = false;
3856         return result;
3857 }
3858
3859 /*
3860  * Send text representation of one attribute, with conversion and escaping
3861  */
3862 #define DUMPSOFAR() \
3863         do { \
3864                 if (ptr > start) \
3865                         CopySendData(cstate, start, ptr - start); \
3866         } while (0)
3867
3868 static void
3869 CopyAttributeOutText(CopyState cstate, char *string)
3870 {
3871         char       *ptr;
3872         char       *start;
3873         char            c;
3874         char            delimc = cstate->delim[0];
3875
3876         if (cstate->need_transcoding)
3877                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
3878         else
3879                 ptr = string;
3880
3881         /*
3882          * We have to grovel through the string searching for control characters
3883          * and instances of the delimiter character.  In most cases, though, these
3884          * are infrequent.      To avoid overhead from calling CopySendData once per
3885          * character, we dump out all characters between escaped characters in a
3886          * single call.  The loop invariant is that the data from "start" to "ptr"
3887          * can be sent literally, but hasn't yet been.
3888          *
3889          * We can skip pg_encoding_mblen() overhead when encoding is safe, because
3890          * in valid backend encodings, extra bytes of a multibyte character never
3891          * look like ASCII.  This loop is sufficiently performance-critical that
3892          * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
3893          * of the normal safe-encoding path.
3894          */
3895         if (cstate->encoding_embeds_ascii)
3896         {
3897                 start = ptr;
3898                 while ((c = *ptr) != '\0')
3899                 {
3900                         if ((unsigned char) c < (unsigned char) 0x20)
3901                         {
3902                                 /*
3903                                  * \r and \n must be escaped, the others are traditional. We
3904                                  * prefer to dump these using the C-like notation, rather than
3905                                  * a backslash and the literal character, because it makes the
3906                                  * dump file a bit more proof against Microsoftish data
3907                                  * mangling.
3908                                  */
3909                                 switch (c)
3910                                 {
3911                                         case '\b':
3912                                                 c = 'b';
3913                                                 break;
3914                                         case '\f':
3915                                                 c = 'f';
3916                                                 break;
3917                                         case '\n':
3918                                                 c = 'n';
3919                                                 break;
3920                                         case '\r':
3921                                                 c = 'r';
3922                                                 break;
3923                                         case '\t':
3924                                                 c = 't';
3925                                                 break;
3926                                         case '\v':
3927                                                 c = 'v';
3928                                                 break;
3929                                         default:
3930                                                 /* If it's the delimiter, must backslash it */
3931                                                 if (c == delimc)
3932                                                         break;
3933                                                 /* All ASCII control chars are length 1 */
3934                                                 ptr++;
3935                                                 continue;               /* fall to end of loop */
3936                                 }
3937                                 /* if we get here, we need to convert the control char */
3938                                 DUMPSOFAR();
3939                                 CopySendChar(cstate, '\\');
3940                                 CopySendChar(cstate, c);
3941                                 start = ++ptr;  /* do not include char in next run */
3942                         }
3943                         else if (c == '\\' || c == delimc)
3944                         {
3945                                 DUMPSOFAR();
3946                                 CopySendChar(cstate, '\\');
3947                                 start = ptr++;  /* we include char in next run */
3948                         }
3949                         else if (IS_HIGHBIT_SET(c))
3950                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
3951                         else
3952                                 ptr++;
3953                 }
3954         }
3955         else
3956         {
3957                 start = ptr;
3958                 while ((c = *ptr) != '\0')
3959                 {
3960                         if ((unsigned char) c < (unsigned char) 0x20)
3961                         {
3962                                 /*
3963                                  * \r and \n must be escaped, the others are traditional. We
3964                                  * prefer to dump these using the C-like notation, rather than
3965                                  * a backslash and the literal character, because it makes the
3966                                  * dump file a bit more proof against Microsoftish data
3967                                  * mangling.
3968                                  */
3969                                 switch (c)
3970                                 {
3971                                         case '\b':
3972                                                 c = 'b';
3973                                                 break;
3974                                         case '\f':
3975                                                 c = 'f';
3976                                                 break;
3977                                         case '\n':
3978                                                 c = 'n';
3979                                                 break;
3980                                         case '\r':
3981                                                 c = 'r';
3982                                                 break;
3983                                         case '\t':
3984                                                 c = 't';
3985                                                 break;
3986                                         case '\v':
3987                                                 c = 'v';
3988                                                 break;
3989                                         default:
3990                                                 /* If it's the delimiter, must backslash it */
3991                                                 if (c == delimc)
3992                                                         break;
3993                                                 /* All ASCII control chars are length 1 */
3994                                                 ptr++;
3995                                                 continue;               /* fall to end of loop */
3996                                 }
3997                                 /* if we get here, we need to convert the control char */
3998                                 DUMPSOFAR();
3999                                 CopySendChar(cstate, '\\');
4000                                 CopySendChar(cstate, c);
4001                                 start = ++ptr;  /* do not include char in next run */
4002                         }
4003                         else if (c == '\\' || c == delimc)
4004                         {
4005                                 DUMPSOFAR();
4006                                 CopySendChar(cstate, '\\');
4007                                 start = ptr++;  /* we include char in next run */
4008                         }
4009                         else
4010                                 ptr++;
4011                 }
4012         }
4013
4014         DUMPSOFAR();
4015 }
4016
4017 /*
4018  * Send text representation of one attribute, with conversion and
4019  * CSV-style escaping
4020  */
4021 static void
4022 CopyAttributeOutCSV(CopyState cstate, char *string,
4023                                         bool use_quote, bool single_attr)
4024 {
4025         char       *ptr;
4026         char       *start;
4027         char            c;
4028         char            delimc = cstate->delim[0];
4029         char            quotec = cstate->quote[0];
4030         char            escapec = cstate->escape[0];
4031
4032         /* force quoting if it matches null_print (before conversion!) */
4033         if (!use_quote && strcmp(string, cstate->null_print) == 0)
4034                 use_quote = true;
4035
4036         if (cstate->need_transcoding)
4037                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
4038         else
4039                 ptr = string;
4040
4041         /*
4042          * Make a preliminary pass to discover if it needs quoting
4043          */
4044         if (!use_quote)
4045         {
4046                 /*
4047                  * Because '\.' can be a data value, quote it if it appears alone on a
4048                  * line so it is not interpreted as the end-of-data marker.
4049                  */
4050                 if (single_attr && strcmp(ptr, "\\.") == 0)
4051                         use_quote = true;
4052                 else
4053                 {
4054                         char       *tptr = ptr;
4055
4056                         while ((c = *tptr) != '\0')
4057                         {
4058                                 if (c == delimc || c == quotec || c == '\n' || c == '\r')
4059                                 {
4060                                         use_quote = true;
4061                                         break;
4062                                 }
4063                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4064                                         tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
4065                                 else
4066                                         tptr++;
4067                         }
4068                 }
4069         }
4070
4071         if (use_quote)
4072         {
4073                 CopySendChar(cstate, quotec);
4074
4075                 /*
4076                  * We adopt the same optimization strategy as in CopyAttributeOutText
4077                  */
4078                 start = ptr;
4079                 while ((c = *ptr) != '\0')
4080                 {
4081                         if (c == quotec || c == escapec)
4082                         {
4083                                 DUMPSOFAR();
4084                                 CopySendChar(cstate, escapec);
4085                                 start = ptr;    /* we include char in next run */
4086                         }
4087                         if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4088                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
4089                         else
4090                                 ptr++;
4091                 }
4092                 DUMPSOFAR();
4093
4094                 CopySendChar(cstate, quotec);
4095         }
4096         else
4097         {
4098                 /* If it doesn't need quoting, we can just dump it as-is */
4099                 CopySendString(cstate, ptr);
4100         }
4101 }
4102
4103 /*
4104  * CopyGetAttnums - build an integer list of attnums to be copied
4105  *
4106  * The input attnamelist is either the user-specified column list,
4107  * or NIL if there was none (in which case we want all the non-dropped
4108  * columns).
4109  *
4110  * rel can be NULL ... it's only used for error reports.
4111  */
4112 static List *
4113 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
4114 {
4115         List       *attnums = NIL;
4116
4117         if (attnamelist == NIL)
4118         {
4119                 /* Generate default column list */
4120                 Form_pg_attribute *attr = tupDesc->attrs;
4121                 int                     attr_count = tupDesc->natts;
4122                 int                     i;
4123
4124                 for (i = 0; i < attr_count; i++)
4125                 {
4126                         if (attr[i]->attisdropped)
4127                                 continue;
4128                         attnums = lappend_int(attnums, i + 1);
4129                 }
4130         }
4131         else
4132         {
4133                 /* Validate the user-supplied list and extract attnums */
4134                 ListCell   *l;
4135
4136                 foreach(l, attnamelist)
4137                 {
4138                         char       *name = strVal(lfirst(l));
4139                         int                     attnum;
4140                         int                     i;
4141
4142                         /* Lookup column name */
4143                         attnum = InvalidAttrNumber;
4144                         for (i = 0; i < tupDesc->natts; i++)
4145                         {
4146                                 if (tupDesc->attrs[i]->attisdropped)
4147                                         continue;
4148                                 if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0)
4149                                 {
4150                                         attnum = tupDesc->attrs[i]->attnum;
4151                                         break;
4152                                 }
4153                         }
4154                         if (attnum == InvalidAttrNumber)
4155                         {
4156                                 if (rel != NULL)
4157                                         ereport(ERROR,
4158                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4159                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
4160                                                    name, RelationGetRelationName(rel))));
4161                                 else
4162                                         ereport(ERROR,
4163                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4164                                                          errmsg("column \"%s\" does not exist",
4165                                                                         name)));
4166                         }
4167                         /* Check for duplicates */
4168                         if (list_member_int(attnums, attnum))
4169                                 ereport(ERROR,
4170                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
4171                                                  errmsg("column \"%s\" specified more than once",
4172                                                                 name)));
4173                         attnums = lappend_int(attnums, attnum);
4174                 }
4175         }
4176
4177         return attnums;
4178 }
4179
4180
4181 /*
4182  * copy_dest_startup --- executor startup
4183  */
4184 static void
4185 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
4186 {
4187         /* no-op */
4188 }
4189
4190 /*
4191  * copy_dest_receive --- receive one tuple
4192  */
4193 static void
4194 copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
4195 {
4196         DR_copy    *myState = (DR_copy *) self;
4197         CopyState       cstate = myState->cstate;
4198
4199         /* Make sure the tuple is fully deconstructed */
4200         slot_getallattrs(slot);
4201
4202         /* And send the data */
4203         CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
4204         myState->processed++;
4205 }
4206
4207 /*
4208  * copy_dest_shutdown --- executor end
4209  */
4210 static void
4211 copy_dest_shutdown(DestReceiver *self)
4212 {
4213         /* no-op */
4214 }
4215
4216 /*
4217  * copy_dest_destroy --- release DestReceiver object
4218  */
4219 static void
4220 copy_dest_destroy(DestReceiver *self)
4221 {
4222         pfree(self);
4223 }
4224
4225 /*
4226  * CreateCopyDestReceiver -- create a suitable DestReceiver object
4227  */
4228 DestReceiver *
4229 CreateCopyDestReceiver(void)
4230 {
4231         DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
4232
4233         self->pub.receiveSlot = copy_dest_receive;
4234         self->pub.rStartup = copy_dest_startup;
4235         self->pub.rShutdown = copy_dest_shutdown;
4236         self->pub.rDestroy = copy_dest_destroy;
4237         self->pub.mydest = DestCopyOut;
4238
4239         self->cstate = NULL;            /* will be set later */
4240         self->processed = 0;
4241
4242         return (DestReceiver *) self;
4243 }