granicus.if.org Git - postgresql/blob - src/backend/commands/copy.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * copy.c
   4  *              Implements the COPY utility command
   5  *
   6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/commands/copy.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18 #include <unistd.h>
  19 #include <sys/stat.h>
  20 #include <netinet/in.h>
  21 #include <arpa/inet.h>
  22
  23 #include "access/heapam.h"
  24 #include "access/htup_details.h"
  25 #include "access/sysattr.h"
  26 #include "access/xact.h"
  27 #include "catalog/namespace.h"
  28 #include "catalog/pg_type.h"
  29 #include "commands/copy.h"
  30 #include "commands/defrem.h"
  31 #include "commands/trigger.h"
  32 #include "executor/executor.h"
  33 #include "libpq/libpq.h"
  34 #include "libpq/pqformat.h"
  35 #include "mb/pg_wchar.h"
  36 #include "miscadmin.h"
  37 #include "optimizer/clauses.h"
  38 #include "optimizer/planner.h"
  39 #include "parser/parse_relation.h"
  40 #include "rewrite/rewriteHandler.h"
  41 #include "storage/fd.h"
  42 #include "tcop/tcopprot.h"
  43 #include "utils/acl.h"
  44 #include "utils/builtins.h"
  45 #include "utils/lsyscache.h"
  46 #include "utils/memutils.h"
  47 #include "utils/portal.h"
  48 #include "utils/rel.h"
  49 #include "utils/snapmgr.h"
  50
  51
  52 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
  53 #define OCTVALUE(c) ((c) - '0')
  54
  55 /*
  56  * Represents the different source/dest cases we need to worry about at
  57  * the bottom level
  58  */
  59 typedef enum CopyDest
  60 {
  61         COPY_FILE,                                      /* to/from file */
  62         COPY_OLD_FE,                            /* to/from frontend (2.0 protocol) */
  63         COPY_NEW_FE                                     /* to/from frontend (3.0 protocol) */
  64 } CopyDest;
  65
  66 /*
  67  *      Represents the end-of-line terminator type of the input
  68  */
  69 typedef enum EolType
  70 {
  71         EOL_UNKNOWN,
  72         EOL_NL,
  73         EOL_CR,
  74         EOL_CRNL
  75 } EolType;
  76
  77 /*
  78  * This struct contains all the state variables used throughout a COPY
  79  * operation. For simplicity, we use the same struct for all variants of COPY,
  80  * even though some fields are used in only some cases.
  81  *
  82  * Multi-byte encodings: all supported client-side encodings encode multi-byte
  83  * characters by having the first byte's high bit set. Subsequent bytes of the
  84  * character can have the high bit not set. When scanning data in such an
  85  * encoding to look for a match to a single-byte (ie ASCII) character, we must
  86  * use the full pg_encoding_mblen() machinery to skip over multibyte
  87  * characters, else we might find a false match to a trailing byte. In
  88  * supported server encodings, there is no possibility of a false match, and
  89  * it's faster to make useless comparisons to trailing bytes than it is to
  90  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
  91  * when we have to do it the hard way.
  92  */
  93 typedef struct CopyStateData
  94 {
  95         /* low-level state data */
  96         CopyDest        copy_dest;              /* type of copy source/destination */
  97         FILE       *copy_file;          /* used if copy_dest == COPY_FILE */
  98         StringInfo      fe_msgbuf;              /* used for all dests during COPY TO, only for
  99                                                                  * dest == COPY_NEW_FE in COPY FROM */
 100         bool            fe_eof;                 /* true if detected end of copy data */
 101         EolType         eol_type;               /* EOL type of input */
 102         int                     file_encoding;  /* file or remote side's character encoding */
 103         bool            need_transcoding;               /* file encoding diff from server? */
 104         bool            encoding_embeds_ascii;  /* ASCII can be non-first byte? */
 105
 106         /* parameters from the COPY command */
 107         Relation        rel;                    /* relation to copy to or from */
 108         QueryDesc  *queryDesc;          /* executable query to copy from */
 109         List       *attnumlist;         /* integer list of attnums to copy */
 110         char       *filename;           /* filename, or NULL for STDIN/STDOUT */
 111         bool            binary;                 /* binary format? */
 112         bool            oids;                   /* include OIDs? */
 113         bool            freeze;                 /* freeze rows on loading? */
 114         bool            csv_mode;               /* Comma Separated Value format? */
 115         bool            header_line;    /* CSV header line? */
 116         char       *null_print;         /* NULL marker string (server encoding!) */
 117         int                     null_print_len; /* length of same */
 118         char       *null_print_client;          /* same converted to file encoding */
 119         char       *delim;                      /* column delimiter (must be 1 byte) */
 120         char       *quote;                      /* CSV quote char (must be 1 byte) */
 121         char       *escape;                     /* CSV escape char (must be 1 byte) */
 122         List       *force_quote;        /* list of column names */
 123         bool            force_quote_all;        /* FORCE QUOTE *? */
 124         bool       *force_quote_flags;          /* per-column CSV FQ flags */
 125         List       *force_notnull;      /* list of column names */
 126         bool       *force_notnull_flags;        /* per-column CSV FNN flags */
 127         bool            convert_selectively;    /* do selective binary conversion? */
 128         List       *convert_select;     /* list of column names (can be NIL) */
 129         bool       *convert_select_flags;       /* per-column CSV/TEXT CS flags */
 130
 131         /* these are just for error messages, see CopyFromErrorCallback */
 132         const char *cur_relname;        /* table name for error messages */
 133         int                     cur_lineno;             /* line number for error messages */
 134         const char *cur_attname;        /* current att for error messages */
 135         const char *cur_attval;         /* current att value for error messages */
 136
 137         /*
 138          * Working state for COPY TO/FROM
 139          */
 140         MemoryContext copycontext;      /* per-copy execution context */
 141
 142         /*
 143          * Working state for COPY TO
 144          */
 145         FmgrInfo   *out_functions;      /* lookup info for output functions */
 146         MemoryContext rowcontext;       /* per-row evaluation context */
 147
 148         /*
 149          * Working state for COPY FROM
 150          */
 151         AttrNumber      num_defaults;
 152         bool            file_has_oids;
 153         FmgrInfo        oid_in_function;
 154         Oid                     oid_typioparam;
 155         FmgrInfo   *in_functions;       /* array of input functions for each attrs */
 156         Oid                *typioparams;        /* array of element types for in_functions */
 157         int                *defmap;                     /* array of default att numbers */
 158         ExprState **defexprs;           /* array of default att expressions */
 159         bool            volatile_defexprs;              /* is any of defexprs volatile? */
 160
 161         /*
 162          * These variables are used to reduce overhead in textual COPY FROM.
 163          *
 164          * attribute_buf holds the separated, de-escaped text for each field of
 165          * the current line.  The CopyReadAttributes functions return arrays of
 166          * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
 167          * the buffer on each cycle.
 168          */
 169         StringInfoData attribute_buf;
 170
 171         /* field raw data pointers found by COPY FROM */
 172
 173         int                     max_fields;
 174         char      **raw_fields;
 175
 176         /*
 177          * Similarly, line_buf holds the whole input line being processed. The
 178          * input cycle is first to read the whole line into line_buf, convert it
 179          * to server encoding there, and then extract the individual attribute
 180          * fields into attribute_buf.  line_buf is preserved unmodified so that we
 181          * can display it in error messages if appropriate.
 182          */
 183         StringInfoData line_buf;
 184         bool            line_buf_converted;             /* converted to server encoding? */
 185
 186         /*
 187          * Finally, raw_buf holds raw data read from the data source (file or
 188          * client connection).  CopyReadLine parses this data sufficiently to
 189          * locate line boundaries, then transfers the data to line_buf and
 190          * converts it.  Note: we guarantee that there is a \0 at
 191          * raw_buf[raw_buf_len].
 192          */
 193 #define RAW_BUF_SIZE 65536              /* we palloc RAW_BUF_SIZE+1 bytes */
 194         char       *raw_buf;
 195         int                     raw_buf_index;  /* next byte to process */
 196         int                     raw_buf_len;    /* total # of bytes stored */
 197 } CopyStateData;
 198
 199 /* DestReceiver for COPY (SELECT) TO */
 200 typedef struct
 201 {
 202         DestReceiver pub;                       /* publicly-known function pointers */
 203         CopyState       cstate;                 /* CopyStateData for the command */
 204         uint64          processed;              /* # of tuples processed */
 205 } DR_copy;
 206
 207
 208 /*
 209  * These macros centralize code used to process line_buf and raw_buf buffers.
 210  * They are macros because they often do continue/break control and to avoid
 211  * function call overhead in tight COPY loops.
 212  *
 213  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
 214  * prevent the continue/break processing from working.  We end the "if (1)"
 215  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
 216  * any "else" in the calling code, and to avoid any compiler warnings about
 217  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
 218  */
 219
 220 /*
 221  * This keeps the character read at the top of the loop in the buffer
 222  * even if there is more than one read-ahead.
 223  */
 224 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
 225 if (1) \
 226 { \
 227         if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
 228         { \
 229                 raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
 230                 need_data = true; \
 231                 continue; \
 232         } \
 233 } else ((void) 0)
 234
 235 /* This consumes the remainder of the buffer and breaks */
 236 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
 237 if (1) \
 238 { \
 239         if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
 240         { \
 241                 if (extralen) \
 242                         raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
 243                 /* backslash just before EOF, treat as data char */ \
 244                 result = true; \
 245                 break; \
 246         } \
 247 } else ((void) 0)
 248
 249 /*
 250  * Transfer any approved data to line_buf; must do this to be sure
 251  * there is some room in raw_buf.
 252  */
 253 #define REFILL_LINEBUF \
 254 if (1) \
 255 { \
 256         if (raw_buf_ptr > cstate->raw_buf_index) \
 257         { \
 258                 appendBinaryStringInfo(&cstate->line_buf, \
 259                                                          cstate->raw_buf + cstate->raw_buf_index, \
 260                                                            raw_buf_ptr - cstate->raw_buf_index); \
 261                 cstate->raw_buf_index = raw_buf_ptr; \
 262         } \
 263 } else ((void) 0)
 264
 265 /* Undo any read-ahead and jump out of the block. */
 266 #define NO_END_OF_COPY_GOTO \
 267 if (1) \
 268 { \
 269         raw_buf_ptr = prev_raw_ptr + 1; \
 270         goto not_end_of_copy; \
 271 } else ((void) 0)
 272
 273 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 274
 275
 276 /* non-export function prototypes */
 277 static CopyState BeginCopy(bool is_from, Relation rel, Node *raw_query,
 278                   const char *queryString, List *attnamelist, List *options);
 279 static void EndCopy(CopyState cstate);
 280 static CopyState BeginCopyTo(Relation rel, Node *query, const char *queryString,
 281                         const char *filename, List *attnamelist, List *options);
 282 static void EndCopyTo(CopyState cstate);
 283 static uint64 DoCopyTo(CopyState cstate);
 284 static uint64 CopyTo(CopyState cstate);
 285 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
 286                          Datum *values, bool *nulls);
 287 static uint64 CopyFrom(CopyState cstate);
 288 static void CopyFromInsertBatch(CopyState cstate, EState *estate,
 289                                         CommandId mycid, int hi_options,
 290                                         ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
 291                                         BulkInsertState bistate,
 292                                         int nBufferedTuples, HeapTuple *bufferedTuples);
 293 static bool CopyReadLine(CopyState cstate);
 294 static bool CopyReadLineText(CopyState cstate);
 295 static int      CopyReadAttributesText(CopyState cstate);
 296 static int      CopyReadAttributesCSV(CopyState cstate);
 297 static Datum CopyReadBinaryAttribute(CopyState cstate,
 298                                                 int column_no, FmgrInfo *flinfo,
 299                                                 Oid typioparam, int32 typmod,
 300                                                 bool *isnull);
 301 static void CopyAttributeOutText(CopyState cstate, char *string);
 302 static void CopyAttributeOutCSV(CopyState cstate, char *string,
 303                                         bool use_quote, bool single_attr);
 304 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
 305                            List *attnamelist);
 306 static char *limit_printout_length(const char *str);
 307
 308 /* Low-level communications functions */
 309 static void SendCopyBegin(CopyState cstate);
 310 static void ReceiveCopyBegin(CopyState cstate);
 311 static void SendCopyEnd(CopyState cstate);
 312 static void CopySendData(CopyState cstate, const void *databuf, int datasize);
 313 static void CopySendString(CopyState cstate, const char *str);
 314 static void CopySendChar(CopyState cstate, char c);
 315 static void CopySendEndOfRow(CopyState cstate);
 316 static int CopyGetData(CopyState cstate, void *databuf,
 317                         int minread, int maxread);
 318 static void CopySendInt32(CopyState cstate, int32 val);
 319 static bool CopyGetInt32(CopyState cstate, int32 *val);
 320 static void CopySendInt16(CopyState cstate, int16 val);
 321 static bool CopyGetInt16(CopyState cstate, int16 *val);
 322
 323
 324 /*
 325  * Send copy start/stop messages for frontend copies.  These have changed
 326  * in past protocol redesigns.
 327  */
 328 static void
 329 SendCopyBegin(CopyState cstate)
 330 {
 331         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
 332         {
 333                 /* new way */
 334                 StringInfoData buf;
 335                 int                     natts = list_length(cstate->attnumlist);
 336                 int16           format = (cstate->binary ? 1 : 0);
 337                 int                     i;
 338
 339                 pq_beginmessage(&buf, 'H');
 340                 pq_sendbyte(&buf, format);              /* overall format */
 341                 pq_sendint(&buf, natts, 2);
 342                 for (i = 0; i < natts; i++)
 343                         pq_sendint(&buf, format, 2);            /* per-column formats */
 344                 pq_endmessage(&buf);
 345                 cstate->copy_dest = COPY_NEW_FE;
 346         }
 347         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 348         {
 349                 /* old way */
 350                 if (cstate->binary)
 351                         ereport(ERROR,
 352                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 353                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 354                 pq_putemptymessage('H');
 355                 /* grottiness needed for old COPY OUT protocol */
 356                 pq_startcopyout();
 357                 cstate->copy_dest = COPY_OLD_FE;
 358         }
 359         else
 360         {
 361                 /* very old way */
 362                 if (cstate->binary)
 363                         ereport(ERROR,
 364                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 365                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 366                 pq_putemptymessage('B');
 367                 /* grottiness needed for old COPY OUT protocol */
 368                 pq_startcopyout();
 369                 cstate->copy_dest = COPY_OLD_FE;
 370         }
 371 }
 372
 373 static void
 374 ReceiveCopyBegin(CopyState cstate)
 375 {
 376         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
 377         {
 378                 /* new way */
 379                 StringInfoData buf;
 380                 int                     natts = list_length(cstate->attnumlist);
 381                 int16           format = (cstate->binary ? 1 : 0);
 382                 int                     i;
 383
 384                 pq_beginmessage(&buf, 'G');
 385                 pq_sendbyte(&buf, format);              /* overall format */
 386                 pq_sendint(&buf, natts, 2);
 387                 for (i = 0; i < natts; i++)
 388                         pq_sendint(&buf, format, 2);            /* per-column formats */
 389                 pq_endmessage(&buf);
 390                 cstate->copy_dest = COPY_NEW_FE;
 391                 cstate->fe_msgbuf = makeStringInfo();
 392         }
 393         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 394         {
 395                 /* old way */
 396                 if (cstate->binary)
 397                         ereport(ERROR,
 398                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 399                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 400                 pq_putemptymessage('G');
 401                 cstate->copy_dest = COPY_OLD_FE;
 402         }
 403         else
 404         {
 405                 /* very old way */
 406                 if (cstate->binary)
 407                         ereport(ERROR,
 408                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 409                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 410                 pq_putemptymessage('D');
 411                 cstate->copy_dest = COPY_OLD_FE;
 412         }
 413         /* We *must* flush here to ensure FE knows it can send. */
 414         pq_flush();
 415 }
 416
 417 static void
 418 SendCopyEnd(CopyState cstate)
 419 {
 420         if (cstate->copy_dest == COPY_NEW_FE)
 421         {
 422                 /* Shouldn't have any unsent data */
 423                 Assert(cstate->fe_msgbuf->len == 0);
 424                 /* Send Copy Done message */
 425                 pq_putemptymessage('c');
 426         }
 427         else
 428         {
 429                 CopySendData(cstate, "\\.", 2);
 430                 /* Need to flush out the trailer (this also appends a newline) */
 431                 CopySendEndOfRow(cstate);
 432                 pq_endcopyout(false);
 433         }
 434 }
 435
 436 /*----------
 437  * CopySendData sends output data to the destination (file or frontend)
 438  * CopySendString does the same for null-terminated strings
 439  * CopySendChar does the same for single characters
 440  * CopySendEndOfRow does the appropriate thing at end of each data row
 441  *      (data is not actually flushed except by CopySendEndOfRow)
 442  *
 443  * NB: no data conversion is applied by these functions
 444  *----------
 445  */
 446 static void
 447 CopySendData(CopyState cstate, const void *databuf, int datasize)
 448 {
 449         appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
 450 }
 451
 452 static void
 453 CopySendString(CopyState cstate, const char *str)
 454 {
 455         appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
 456 }
 457
 458 static void
 459 CopySendChar(CopyState cstate, char c)
 460 {
 461         appendStringInfoCharMacro(cstate->fe_msgbuf, c);
 462 }
 463
 464 static void
 465 CopySendEndOfRow(CopyState cstate)
 466 {
 467         StringInfo      fe_msgbuf = cstate->fe_msgbuf;
 468
 469         switch (cstate->copy_dest)
 470         {
 471                 case COPY_FILE:
 472                         if (!cstate->binary)
 473                         {
 474                                 /* Default line termination depends on platform */
 475 #ifndef WIN32
 476                                 CopySendChar(cstate, '\n');
 477 #else
 478                                 CopySendString(cstate, "\r\n");
 479 #endif
 480                         }
 481
 482                         if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
 483                                            cstate->copy_file) != 1 ||
 484                                 ferror(cstate->copy_file))
 485                                 ereport(ERROR,
 486                                                 (errcode_for_file_access(),
 487                                                  errmsg("could not write to COPY file: %m")));
 488                         break;
 489                 case COPY_OLD_FE:
 490                         /* The FE/BE protocol uses \n as newline for all platforms */
 491                         if (!cstate->binary)
 492                                 CopySendChar(cstate, '\n');
 493
 494                         if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
 495                         {
 496                                 /* no hope of recovering connection sync, so FATAL */
 497                                 ereport(FATAL,
 498                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 499                                                  errmsg("connection lost during COPY to stdout")));
 500                         }
 501                         break;
 502                 case COPY_NEW_FE:
 503                         /* The FE/BE protocol uses \n as newline for all platforms */
 504                         if (!cstate->binary)
 505                                 CopySendChar(cstate, '\n');
 506
 507                         /* Dump the accumulated row as one CopyData message */
 508                         (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
 509                         break;
 510         }
 511
 512         resetStringInfo(fe_msgbuf);
 513 }
 514
 515 /*
 516  * CopyGetData reads data from the source (file or frontend)
 517  *
 518  * We attempt to read at least minread, and at most maxread, bytes from
 519  * the source.  The actual number of bytes read is returned; if this is
 520  * less than minread, EOF was detected.
 521  *
 522  * Note: when copying from the frontend, we expect a proper EOF mark per
 523  * protocol; if the frontend simply drops the connection, we raise error.
 524  * It seems unwise to allow the COPY IN to complete normally in that case.
 525  *
 526  * NB: no data conversion is applied here.
 527  */
 528 static int
 529 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
 530 {
 531         int                     bytesread = 0;
 532
 533         switch (cstate->copy_dest)
 534         {
 535                 case COPY_FILE:
 536                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
 537                         if (ferror(cstate->copy_file))
 538                                 ereport(ERROR,
 539                                                 (errcode_for_file_access(),
 540                                                  errmsg("could not read from COPY file: %m")));
 541                         break;
 542                 case COPY_OLD_FE:
 543
 544                         /*
 545                          * We cannot read more than minread bytes (which in practice is 1)
 546                          * because old protocol doesn't have any clear way of separating
 547                          * the COPY stream from following data.  This is slow, but not any
 548                          * slower than the code path was originally, and we don't care
 549                          * much anymore about the performance of old protocol.
 550                          */
 551                         if (pq_getbytes((char *) databuf, minread))
 552                         {
 553                                 /* Only a \. terminator is legal EOF in old protocol */
 554                                 ereport(ERROR,
 555                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 556                                                  errmsg("unexpected EOF on client connection with an open transaction")));
 557                         }
 558                         bytesread = minread;
 559                         break;
 560                 case COPY_NEW_FE:
 561                         while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
 562                         {
 563                                 int                     avail;
 564
 565                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
 566                                 {
 567                                         /* Try to receive another message */
 568                                         int                     mtype;
 569
 570                         readmessage:
 571                                         mtype = pq_getbyte();
 572                                         if (mtype == EOF)
 573                                                 ereport(ERROR,
 574                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 575                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
 576                                         if (pq_getmessage(cstate->fe_msgbuf, 0))
 577                                                 ereport(ERROR,
 578                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 579                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
 580                                         switch (mtype)
 581                                         {
 582                                                 case 'd':               /* CopyData */
 583                                                         break;
 584                                                 case 'c':               /* CopyDone */
 585                                                         /* COPY IN correctly terminated by frontend */
 586                                                         cstate->fe_eof = true;
 587                                                         return bytesread;
 588                                                 case 'f':               /* CopyFail */
 589                                                         ereport(ERROR,
 590                                                                         (errcode(ERRCODE_QUERY_CANCELED),
 591                                                                          errmsg("COPY from stdin failed: %s",
 592                                                                            pq_getmsgstring(cstate->fe_msgbuf))));
 593                                                         break;
 594                                                 case 'H':               /* Flush */
 595                                                 case 'S':               /* Sync */
 596
 597                                                         /*
 598                                                          * Ignore Flush/Sync for the convenience of client
 599                                                          * libraries (such as libpq) that may send those
 600                                                          * without noticing that the command they just
 601                                                          * sent was COPY.
 602                                                          */
 603                                                         goto readmessage;
 604                                                 default:
 605                                                         ereport(ERROR,
 606                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
 607                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
 608                                                                                         mtype)));
 609                                                         break;
 610                                         }
 611                                 }
 612                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
 613                                 if (avail > maxread)
 614                                         avail = maxread;
 615                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
 616                                 databuf = (void *) ((char *) databuf + avail);
 617                                 maxread -= avail;
 618                                 bytesread += avail;
 619                         }
 620                         break;
 621         }
 622
 623         return bytesread;
 624 }
 625
 626
 627 /*
 628  * These functions do apply some data conversion
 629  */
 630
 631 /*
 632  * CopySendInt32 sends an int32 in network byte order
 633  */
 634 static void
 635 CopySendInt32(CopyState cstate, int32 val)
 636 {
 637         uint32          buf;
 638
 639         buf = htonl((uint32) val);
 640         CopySendData(cstate, &buf, sizeof(buf));
 641 }
 642
 643 /*
 644  * CopyGetInt32 reads an int32 that appears in network byte order
 645  *
 646  * Returns true if OK, false if EOF
 647  */
 648 static bool
 649 CopyGetInt32(CopyState cstate, int32 *val)
 650 {
 651         uint32          buf;
 652
 653         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
 654         {
 655                 *val = 0;                               /* suppress compiler warning */
 656                 return false;
 657         }
 658         *val = (int32) ntohl(buf);
 659         return true;
 660 }
 661
 662 /*
 663  * CopySendInt16 sends an int16 in network byte order
 664  */
 665 static void
 666 CopySendInt16(CopyState cstate, int16 val)
 667 {
 668         uint16          buf;
 669
 670         buf = htons((uint16) val);
 671         CopySendData(cstate, &buf, sizeof(buf));
 672 }
 673
 674 /*
 675  * CopyGetInt16 reads an int16 that appears in network byte order
 676  */
 677 static bool
 678 CopyGetInt16(CopyState cstate, int16 *val)
 679 {
 680         uint16          buf;
 681
 682         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
 683         {
 684                 *val = 0;                               /* suppress compiler warning */
 685                 return false;
 686         }
 687         *val = (int16) ntohs(buf);
 688         return true;
 689 }
 690
 691
 692 /*
 693  * CopyLoadRawBuf loads some more data into raw_buf
 694  *
 695  * Returns TRUE if able to obtain at least one more byte, else FALSE.
 696  *
 697  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
 698  * down to the start of the buffer and then we load more data after that.
 699  * This case is used only when a frontend multibyte character crosses a
 700  * bufferload boundary.
 701  */
 702 static bool
 703 CopyLoadRawBuf(CopyState cstate)
 704 {
 705         int                     nbytes;
 706         int                     inbytes;
 707
 708         if (cstate->raw_buf_index < cstate->raw_buf_len)
 709         {
 710                 /* Copy down the unprocessed data */
 711                 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
 712                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
 713                                 nbytes);
 714         }
 715         else
 716                 nbytes = 0;                             /* no data need be saved */
 717
 718         inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
 719                                                   1, RAW_BUF_SIZE - nbytes);
 720         nbytes += inbytes;
 721         cstate->raw_buf[nbytes] = '\0';
 722         cstate->raw_buf_index = 0;
 723         cstate->raw_buf_len = nbytes;
 724         return (inbytes > 0);
 725 }
 726
 727
 728 /*
 729  *       DoCopy executes the SQL COPY statement
 730  *
 731  * Either unload or reload contents of table <relation>, depending on <from>.
 732  * (<from> = TRUE means we are inserting into the table.)  In the "TO" case
 733  * we also support copying the output of an arbitrary SELECT query.
 734  *
 735  * If <pipe> is false, transfer is between the table and the file named
 736  * <filename>.  Otherwise, transfer is between the table and our regular
 737  * input/output stream. The latter could be either stdin/stdout or a
 738  * socket, depending on whether we're running under Postmaster control.
 739  *
 740  * Do not allow a Postgres user without superuser privilege to read from
 741  * or write to a file.
 742  *
 743  * Do not allow the copy if user doesn't have proper permission to access
 744  * the table or the specifically requested columns.
 745  */
 746 Oid
 747 DoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed)
 748 {
 749         CopyState       cstate;
 750         bool            is_from = stmt->is_from;
 751         bool            pipe = (stmt->filename == NULL);
 752         Relation        rel;
 753         Oid         relid;
 754
 755         /* Disallow file COPY except to superusers. */
 756         if (!pipe && !superuser())
 757                 ereport(ERROR,
 758                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 759                                  errmsg("must be superuser to COPY to or from a file"),
 760                                  errhint("Anyone can COPY to stdout or from stdin. "
 761                                                  "psql's \\copy command also works for anyone.")));
 762
 763         if (stmt->relation)
 764         {
 765                 TupleDesc       tupDesc;
 766                 AclMode         required_access = (is_from ? ACL_INSERT : ACL_SELECT);
 767                 RangeTblEntry *rte;
 768                 List       *attnums;
 769                 ListCell   *cur;
 770
 771                 Assert(!stmt->query);
 772
 773                 /* Open and lock the relation, using the appropriate lock type. */
 774                 rel = heap_openrv(stmt->relation,
 775                                                   (is_from ? RowExclusiveLock : AccessShareLock));
 776
 777                 relid = RelationGetRelid(rel);
 778
 779                 rte = makeNode(RangeTblEntry);
 780                 rte->rtekind = RTE_RELATION;
 781                 rte->relid = RelationGetRelid(rel);
 782                 rte->relkind = rel->rd_rel->relkind;
 783                 rte->requiredPerms = required_access;
 784
 785                 tupDesc = RelationGetDescr(rel);
 786                 attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
 787                 foreach(cur, attnums)
 788                 {
 789                         int                     attno = lfirst_int(cur) -
 790                         FirstLowInvalidHeapAttributeNumber;
 791
 792                         if (is_from)
 793                                 rte->modifiedCols = bms_add_member(rte->modifiedCols, attno);
 794                         else
 795                                 rte->selectedCols = bms_add_member(rte->selectedCols, attno);
 796                 }
 797                 ExecCheckRTPerms(list_make1(rte), true);
 798         }
 799         else
 800         {
 801                 Assert(stmt->query);
 802
 803                 relid = InvalidOid;
 804                 rel = NULL;
 805         }
 806
 807         if (is_from)
 808         {
 809                 Assert(rel);
 810
 811                 /* check read-only transaction */
 812                 if (XactReadOnly && !rel->rd_islocaltemp)
 813                         PreventCommandIfReadOnly("COPY FROM");
 814
 815                 cstate = BeginCopyFrom(rel, stmt->filename,
 816                                                            stmt->attlist, stmt->options);
 817                 *processed = CopyFrom(cstate);  /* copy from file to database */
 818                 EndCopyFrom(cstate);
 819         }
 820         else
 821         {
 822                 cstate = BeginCopyTo(rel, stmt->query, queryString, stmt->filename,
 823                                                          stmt->attlist, stmt->options);
 824                 *processed = DoCopyTo(cstate);  /* copy from database to file */
 825                 EndCopyTo(cstate);
 826         }
 827
 828         /*
 829          * Close the relation. If reading, we can release the AccessShareLock we
 830          * got; if writing, we should hold the lock until end of transaction to
 831          * ensure that updates will be committed before lock is released.
 832          */
 833         if (rel != NULL)
 834                 heap_close(rel, (is_from ? NoLock : AccessShareLock));
 835
 836         return relid;
 837 }
 838
 839 /*
 840  * Process the statement option list for COPY.
 841  *
 842  * Scan the options list (a list of DefElem) and transpose the information
 843  * into cstate, applying appropriate error checking.
 844  *
 845  * cstate is assumed to be filled with zeroes initially.
 846  *
 847  * This is exported so that external users of the COPY API can sanity-check
 848  * a list of options.  In that usage, cstate should be passed as NULL
 849  * (since external users don't know sizeof(CopyStateData)) and the collected
 850  * data is just leaked until CurrentMemoryContext is reset.
 851  *
 852  * Note that additional checking, such as whether column names listed in FORCE
 853  * QUOTE actually exist, has to be applied later.  This just checks for
 854  * self-consistency of the options list.
 855  */
 856 void
 857 ProcessCopyOptions(CopyState cstate,
 858                                    bool is_from,
 859                                    List *options)
 860 {
 861         bool            format_specified = false;
 862         ListCell   *option;
 863
 864         /* Support external use for option sanity checking */
 865         if (cstate == NULL)
 866                 cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
 867
 868         cstate->file_encoding = -1;
 869
 870         /* Extract options from the statement node tree */
 871         foreach(option, options)
 872         {
 873                 DefElem    *defel = (DefElem *) lfirst(option);
 874
 875                 if (strcmp(defel->defname, "format") == 0)
 876                 {
 877                         char       *fmt = defGetString(defel);
 878
 879                         if (format_specified)
 880                                 ereport(ERROR,
 881                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 882                                                  errmsg("conflicting or redundant options")));
 883                         format_specified = true;
 884                         if (strcmp(fmt, "text") == 0)
 885                                  /* default format */ ;
 886                         else if (strcmp(fmt, "csv") == 0)
 887                                 cstate->csv_mode = true;
 888                         else if (strcmp(fmt, "binary") == 0)
 889                                 cstate->binary = true;
 890                         else
 891                                 ereport(ERROR,
 892                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 893                                                  errmsg("COPY format \"%s\" not recognized", fmt)));
 894                 }
 895                 else if (strcmp(defel->defname, "oids") == 0)
 896                 {
 897                         if (cstate->oids)
 898                                 ereport(ERROR,
 899                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 900                                                  errmsg("conflicting or redundant options")));
 901                         cstate->oids = defGetBoolean(defel);
 902                 }
 903                 else if (strcmp(defel->defname, "freeze") == 0)
 904                 {
 905                         if (cstate->freeze)
 906                                 ereport(ERROR,
 907                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 908                                                  errmsg("conflicting or redundant options")));
 909                         cstate->freeze = defGetBoolean(defel);
 910                 }
 911                 else if (strcmp(defel->defname, "delimiter") == 0)
 912                 {
 913                         if (cstate->delim)
 914                                 ereport(ERROR,
 915                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 916                                                  errmsg("conflicting or redundant options")));
 917                         cstate->delim = defGetString(defel);
 918                 }
 919                 else if (strcmp(defel->defname, "null") == 0)
 920                 {
 921                         if (cstate->null_print)
 922                                 ereport(ERROR,
 923                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 924                                                  errmsg("conflicting or redundant options")));
 925                         cstate->null_print = defGetString(defel);
 926                 }
 927                 else if (strcmp(defel->defname, "header") == 0)
 928                 {
 929                         if (cstate->header_line)
 930                                 ereport(ERROR,
 931                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 932                                                  errmsg("conflicting or redundant options")));
 933                         cstate->header_line = defGetBoolean(defel);
 934                 }
 935                 else if (strcmp(defel->defname, "quote") == 0)
 936                 {
 937                         if (cstate->quote)
 938                                 ereport(ERROR,
 939                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 940                                                  errmsg("conflicting or redundant options")));
 941                         cstate->quote = defGetString(defel);
 942                 }
 943                 else if (strcmp(defel->defname, "escape") == 0)
 944                 {
 945                         if (cstate->escape)
 946                                 ereport(ERROR,
 947                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 948                                                  errmsg("conflicting or redundant options")));
 949                         cstate->escape = defGetString(defel);
 950                 }
 951                 else if (strcmp(defel->defname, "force_quote") == 0)
 952                 {
 953                         if (cstate->force_quote || cstate->force_quote_all)
 954                                 ereport(ERROR,
 955                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 956                                                  errmsg("conflicting or redundant options")));
 957                         if (defel->arg && IsA(defel->arg, A_Star))
 958                                 cstate->force_quote_all = true;
 959                         else if (defel->arg && IsA(defel->arg, List))
 960                                 cstate->force_quote = (List *) defel->arg;
 961                         else
 962                                 ereport(ERROR,
 963                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 964                                                  errmsg("argument to option \"%s\" must be a list of column names",
 965                                                                 defel->defname)));
 966                 }
 967                 else if (strcmp(defel->defname, "force_not_null") == 0)
 968                 {
 969                         if (cstate->force_notnull)
 970                                 ereport(ERROR,
 971                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 972                                                  errmsg("conflicting or redundant options")));
 973                         if (defel->arg && IsA(defel->arg, List))
 974                                 cstate->force_notnull = (List *) defel->arg;
 975                         else
 976                                 ereport(ERROR,
 977                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 978                                                  errmsg("argument to option \"%s\" must be a list of column names",
 979                                                                 defel->defname)));
 980                 }
 981                 else if (strcmp(defel->defname, "convert_selectively") == 0)
 982                 {
 983                         /*
 984                          * Undocumented, not-accessible-from-SQL option: convert only
 985                          * the named columns to binary form, storing the rest as NULLs.
 986                          * It's allowed for the column list to be NIL.
 987                          */
 988                         if (cstate->convert_selectively)
 989                                 ereport(ERROR,
 990                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 991                                                  errmsg("conflicting or redundant options")));
 992                         cstate->convert_selectively = true;
 993                         if (defel->arg == NULL || IsA(defel->arg, List))
 994                                 cstate->convert_select = (List *) defel->arg;
 995                         else
 996                                 ereport(ERROR,
 997                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 998                                                  errmsg("argument to option \"%s\" must be a list of column names",
 999                                                                 defel->defname)));
1000                 }
1001                 else if (strcmp(defel->defname, "encoding") == 0)
1002                 {
1003                         if (cstate->file_encoding >= 0)
1004                                 ereport(ERROR,
1005                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1006                                                  errmsg("conflicting or redundant options")));
1007                         cstate->file_encoding = pg_char_to_encoding(defGetString(defel));
1008                         if (cstate->file_encoding < 0)
1009                                 ereport(ERROR,
1010                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1011                                                  errmsg("argument to option \"%s\" must be a valid encoding name",
1012                                                                 defel->defname)));
1013                 }
1014                 else
1015                         ereport(ERROR,
1016                                         (errcode(ERRCODE_SYNTAX_ERROR),
1017                                          errmsg("option \"%s\" not recognized",
1018                                                         defel->defname)));
1019         }
1020
1021         /*
1022          * Check for incompatible options (must do these two before inserting
1023          * defaults)
1024          */
1025         if (cstate->binary && cstate->delim)
1026                 ereport(ERROR,
1027                                 (errcode(ERRCODE_SYNTAX_ERROR),
1028                                  errmsg("cannot specify DELIMITER in BINARY mode")));
1029
1030         if (cstate->binary && cstate->null_print)
1031                 ereport(ERROR,
1032                                 (errcode(ERRCODE_SYNTAX_ERROR),
1033                                  errmsg("cannot specify NULL in BINARY mode")));
1034
1035         /* Set defaults for omitted options */
1036         if (!cstate->delim)
1037                 cstate->delim = cstate->csv_mode ? "," : "\t";
1038
1039         if (!cstate->null_print)
1040                 cstate->null_print = cstate->csv_mode ? "" : "\\N";
1041         cstate->null_print_len = strlen(cstate->null_print);
1042
1043         if (cstate->csv_mode)
1044         {
1045                 if (!cstate->quote)
1046                         cstate->quote = "\"";
1047                 if (!cstate->escape)
1048                         cstate->escape = cstate->quote;
1049         }
1050
1051         /* Only single-byte delimiter strings are supported. */
1052         if (strlen(cstate->delim) != 1)
1053                 ereport(ERROR,
1054                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1055                           errmsg("COPY delimiter must be a single one-byte character")));
1056
1057         /* Disallow end-of-line characters */
1058         if (strchr(cstate->delim, '\r') != NULL ||
1059                 strchr(cstate->delim, '\n') != NULL)
1060                 ereport(ERROR,
1061                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1062                          errmsg("COPY delimiter cannot be newline or carriage return")));
1063
1064         if (strchr(cstate->null_print, '\r') != NULL ||
1065                 strchr(cstate->null_print, '\n') != NULL)
1066                 ereport(ERROR,
1067                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1068                                  errmsg("COPY null representation cannot use newline or carriage return")));
1069
1070         /*
1071          * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
1072          * backslash because it would be ambiguous.  We can't allow the other
1073          * cases because data characters matching the delimiter must be
1074          * backslashed, and certain backslash combinations are interpreted
1075          * non-literally by COPY IN.  Disallowing all lower case ASCII letters is
1076          * more than strictly necessary, but seems best for consistency and
1077          * future-proofing.  Likewise we disallow all digits though only octal
1078          * digits are actually dangerous.
1079          */
1080         if (!cstate->csv_mode &&
1081                 strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
1082                            cstate->delim[0]) != NULL)
1083                 ereport(ERROR,
1084                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1085                                  errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
1086
1087         /* Check header */
1088         if (!cstate->csv_mode && cstate->header_line)
1089                 ereport(ERROR,
1090                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1091                                  errmsg("COPY HEADER available only in CSV mode")));
1092
1093         /* Check quote */
1094         if (!cstate->csv_mode && cstate->quote != NULL)
1095                 ereport(ERROR,
1096                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1097                                  errmsg("COPY quote available only in CSV mode")));
1098
1099         if (cstate->csv_mode && strlen(cstate->quote) != 1)
1100                 ereport(ERROR,
1101                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1102                                  errmsg("COPY quote must be a single one-byte character")));
1103
1104         if (cstate->csv_mode && cstate->delim[0] == cstate->quote[0])
1105                 ereport(ERROR,
1106                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1107                                  errmsg("COPY delimiter and quote must be different")));
1108
1109         /* Check escape */
1110         if (!cstate->csv_mode && cstate->escape != NULL)
1111                 ereport(ERROR,
1112                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1113                                  errmsg("COPY escape available only in CSV mode")));
1114
1115         if (cstate->csv_mode && strlen(cstate->escape) != 1)
1116                 ereport(ERROR,
1117                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1118                                  errmsg("COPY escape must be a single one-byte character")));
1119
1120         /* Check force_quote */
1121         if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all))
1122                 ereport(ERROR,
1123                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1124                                  errmsg("COPY force quote available only in CSV mode")));
1125         if ((cstate->force_quote || cstate->force_quote_all) && is_from)
1126                 ereport(ERROR,
1127                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1128                                  errmsg("COPY force quote only available using COPY TO")));
1129
1130         /* Check force_notnull */
1131         if (!cstate->csv_mode && cstate->force_notnull != NIL)
1132                 ereport(ERROR,
1133                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1134                                  errmsg("COPY force not null available only in CSV mode")));
1135         if (cstate->force_notnull != NIL && !is_from)
1136                 ereport(ERROR,
1137                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1138                           errmsg("COPY force not null only available using COPY FROM")));
1139
1140         /* Don't allow the delimiter to appear in the null string. */
1141         if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
1142                 ereport(ERROR,
1143                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1144                 errmsg("COPY delimiter must not appear in the NULL specification")));
1145
1146         /* Don't allow the CSV quote char to appear in the null string. */
1147         if (cstate->csv_mode &&
1148                 strchr(cstate->null_print, cstate->quote[0]) != NULL)
1149                 ereport(ERROR,
1150                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1151                                  errmsg("CSV quote character must not appear in the NULL specification")));
1152 }
1153
1154 /*
1155  * Common setup routines used by BeginCopyFrom and BeginCopyTo.
1156  *
1157  * Iff <binary>, unload or reload in the binary format, as opposed to the
1158  * more wasteful but more robust and portable text format.
1159  *
1160  * Iff <oids>, unload or reload the format that includes OID information.
1161  * On input, we accept OIDs whether or not the table has an OID column,
1162  * but silently drop them if it does not.  On output, we report an error
1163  * if the user asks for OIDs in a table that has none (not providing an
1164  * OID column might seem friendlier, but could seriously confuse programs).
1165  *
1166  * If in the text format, delimit columns with delimiter <delim> and print
1167  * NULL values as <null_print>.
1168  */
1169 static CopyState
1170 BeginCopy(bool is_from,
1171                   Relation rel,
1172                   Node *raw_query,
1173                   const char *queryString,
1174                   List *attnamelist,
1175                   List *options)
1176 {
1177         CopyState       cstate;
1178         TupleDesc       tupDesc;
1179         int                     num_phys_attrs;
1180         MemoryContext oldcontext;
1181
1182         /* Allocate workspace and zero all fields */
1183         cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
1184
1185         /*
1186          * We allocate everything used by a cstate in a new memory context. This
1187          * avoids memory leaks during repeated use of COPY in a query.
1188          */
1189         cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
1190                                                                                                 "COPY",
1191                                                                                                 ALLOCSET_DEFAULT_MINSIZE,
1192                                                                                                 ALLOCSET_DEFAULT_INITSIZE,
1193                                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
1194
1195         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1196
1197         /* Extract options from the statement node tree */
1198         ProcessCopyOptions(cstate, is_from, options);
1199
1200         /* Process the source/target relation or query */
1201         if (rel)
1202         {
1203                 Assert(!raw_query);
1204
1205                 cstate->rel = rel;
1206
1207                 tupDesc = RelationGetDescr(cstate->rel);
1208
1209                 /* Don't allow COPY w/ OIDs to or from a table without them */
1210                 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
1211                         ereport(ERROR,
1212                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
1213                                          errmsg("table \"%s\" does not have OIDs",
1214                                                         RelationGetRelationName(cstate->rel))));
1215         }
1216         else
1217         {
1218                 List       *rewritten;
1219                 Query      *query;
1220                 PlannedStmt *plan;
1221                 DestReceiver *dest;
1222
1223                 Assert(!is_from);
1224                 cstate->rel = NULL;
1225
1226                 /* Don't allow COPY w/ OIDs from a select */
1227                 if (cstate->oids)
1228                         ereport(ERROR,
1229                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1230                                          errmsg("COPY (SELECT) WITH OIDS is not supported")));
1231
1232                 /*
1233                  * Run parse analysis and rewrite.      Note this also acquires sufficient
1234                  * locks on the source table(s).
1235                  *
1236                  * Because the parser and planner tend to scribble on their input, we
1237                  * make a preliminary copy of the source querytree.  This prevents
1238                  * problems in the case that the COPY is in a portal or plpgsql
1239                  * function and is executed repeatedly.  (See also the same hack in
1240                  * DECLARE CURSOR and PREPARE.)  XXX FIXME someday.
1241                  */
1242                 rewritten = pg_analyze_and_rewrite((Node *) copyObject(raw_query),
1243                                                                                    queryString, NULL, 0);
1244
1245                 /* We don't expect more or less than one result query */
1246                 if (list_length(rewritten) != 1)
1247                         elog(ERROR, "unexpected rewrite result");
1248
1249                 query = (Query *) linitial(rewritten);
1250
1251                 /* The grammar allows SELECT INTO, but we don't support that */
1252                 if (query->utilityStmt != NULL &&
1253                         IsA(query->utilityStmt, CreateTableAsStmt))
1254                         ereport(ERROR,
1255                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1256                                          errmsg("COPY (SELECT INTO) is not supported")));
1257
1258                 Assert(query->commandType == CMD_SELECT);
1259                 Assert(query->utilityStmt == NULL);
1260
1261                 /* plan the query */
1262                 plan = planner(query, 0, NULL);
1263
1264                 /*
1265                  * Use a snapshot with an updated command ID to ensure this query sees
1266                  * results of any previously executed queries.
1267                  */
1268                 PushCopiedSnapshot(GetActiveSnapshot());
1269                 UpdateActiveSnapshotCommandId();
1270
1271                 /* Create dest receiver for COPY OUT */
1272                 dest = CreateDestReceiver(DestCopyOut);
1273                 ((DR_copy *) dest)->cstate = cstate;
1274
1275                 /* Create a QueryDesc requesting no output */
1276                 cstate->queryDesc = CreateQueryDesc(plan, queryString,
1277                                                                                         GetActiveSnapshot(),
1278                                                                                         InvalidSnapshot,
1279                                                                                         dest, NULL, 0);
1280
1281                 /*
1282                  * Call ExecutorStart to prepare the plan for execution.
1283                  *
1284                  * ExecutorStart computes a result tupdesc for us
1285                  */
1286                 ExecutorStart(cstate->queryDesc, 0);
1287
1288                 tupDesc = cstate->queryDesc->tupDesc;
1289         }
1290
1291         /* Generate or convert list of attributes to process */
1292         cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1293
1294         num_phys_attrs = tupDesc->natts;
1295
1296         /* Convert FORCE QUOTE name list to per-column flags, check validity */
1297         cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1298         if (cstate->force_quote_all)
1299         {
1300                 int                     i;
1301
1302                 for (i = 0; i < num_phys_attrs; i++)
1303                         cstate->force_quote_flags[i] = true;
1304         }
1305         else if (cstate->force_quote)
1306         {
1307                 List       *attnums;
1308                 ListCell   *cur;
1309
1310                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote);
1311
1312                 foreach(cur, attnums)
1313                 {
1314                         int                     attnum = lfirst_int(cur);
1315
1316                         if (!list_member_int(cstate->attnumlist, attnum))
1317                                 ereport(ERROR,
1318                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1319                                    errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
1320                                                   NameStr(tupDesc->attrs[attnum - 1]->attname))));
1321                         cstate->force_quote_flags[attnum - 1] = true;
1322                 }
1323         }
1324
1325         /* Convert FORCE NOT NULL name list to per-column flags, check validity */
1326         cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1327         if (cstate->force_notnull)
1328         {
1329                 List       *attnums;
1330                 ListCell   *cur;
1331
1332                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull);
1333
1334                 foreach(cur, attnums)
1335                 {
1336                         int                     attnum = lfirst_int(cur);
1337
1338                         if (!list_member_int(cstate->attnumlist, attnum))
1339                                 ereport(ERROR,
1340                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1341                                 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
1342                                            NameStr(tupDesc->attrs[attnum - 1]->attname))));
1343                         cstate->force_notnull_flags[attnum - 1] = true;
1344                 }
1345         }
1346
1347         /* Convert convert_selectively name list to per-column flags */
1348         if (cstate->convert_selectively)
1349         {
1350                 List       *attnums;
1351                 ListCell   *cur;
1352
1353                 cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1354
1355                 attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
1356
1357                 foreach(cur, attnums)
1358                 {
1359                         int                     attnum = lfirst_int(cur);
1360
1361                         if (!list_member_int(cstate->attnumlist, attnum))
1362                                 ereport(ERROR,
1363                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1364                                                  errmsg_internal("selected column \"%s\" not referenced by COPY",
1365                                                                                  NameStr(tupDesc->attrs[attnum - 1]->attname))));
1366                         cstate->convert_select_flags[attnum - 1] = true;
1367                 }
1368         }
1369
1370         /* Use client encoding when ENCODING option is not specified. */
1371         if (cstate->file_encoding < 0)
1372                 cstate->file_encoding = pg_get_client_encoding();
1373
1374         /*
1375          * Set up encoding conversion info.  Even if the file and server encodings
1376          * are the same, we must apply pg_any_to_server() to validate data in
1377          * multibyte encodings.
1378          */
1379         cstate->need_transcoding =
1380                 (cstate->file_encoding != GetDatabaseEncoding() ||
1381                  pg_database_encoding_max_length() > 1);
1382         /* See Multibyte encoding comment above */
1383         cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
1384
1385         cstate->copy_dest = COPY_FILE;          /* default */
1386
1387         MemoryContextSwitchTo(oldcontext);
1388
1389         return cstate;
1390 }
1391
1392 /*
1393  * Release resources allocated in a cstate for COPY TO/FROM.
1394  */
1395 static void
1396 EndCopy(CopyState cstate)
1397 {
1398         if (cstate->filename != NULL && FreeFile(cstate->copy_file))
1399                 ereport(ERROR,
1400                                 (errcode_for_file_access(),
1401                                  errmsg("could not close file \"%s\": %m",
1402                                                 cstate->filename)));
1403
1404         MemoryContextDelete(cstate->copycontext);
1405         pfree(cstate);
1406 }
1407
1408 /*
1409  * Setup CopyState to read tuples from a table or a query for COPY TO.
1410  */
1411 static CopyState
1412 BeginCopyTo(Relation rel,
1413                         Node *query,
1414                         const char *queryString,
1415                         const char *filename,
1416                         List *attnamelist,
1417                         List *options)
1418 {
1419         CopyState       cstate;
1420         bool            pipe = (filename == NULL);
1421         MemoryContext oldcontext;
1422
1423         if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
1424         {
1425                 if (rel->rd_rel->relkind == RELKIND_VIEW)
1426                         ereport(ERROR,
1427                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1428                                          errmsg("cannot copy from view \"%s\"",
1429                                                         RelationGetRelationName(rel)),
1430                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1431                 else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1432                         ereport(ERROR,
1433                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1434                                          errmsg("cannot copy from foreign table \"%s\"",
1435                                                         RelationGetRelationName(rel)),
1436                                          errhint("Try the COPY (SELECT ...) TO variant.")));
1437                 else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
1438                         ereport(ERROR,
1439                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1440                                          errmsg("cannot copy from sequence \"%s\"",
1441                                                         RelationGetRelationName(rel))));
1442                 else
1443                         ereport(ERROR,
1444                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1445                                          errmsg("cannot copy from non-table relation \"%s\"",
1446                                                         RelationGetRelationName(rel))));
1447         }
1448
1449         cstate = BeginCopy(false, rel, query, queryString, attnamelist, options);
1450         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1451
1452         if (pipe)
1453         {
1454                 if (whereToSendOutput != DestRemote)
1455                         cstate->copy_file = stdout;
1456         }
1457         else
1458         {
1459                 mode_t          oumask;         /* Pre-existing umask value */
1460                 struct stat st;
1461
1462                 /*
1463                  * Prevent write to relative path ... too easy to shoot oneself in the
1464                  * foot by overwriting a database file ...
1465                  */
1466                 if (!is_absolute_path(filename))
1467                         ereport(ERROR,
1468                                         (errcode(ERRCODE_INVALID_NAME),
1469                                          errmsg("relative path not allowed for COPY to file")));
1470
1471                 cstate->filename = pstrdup(filename);
1472                 oumask = umask(S_IWGRP | S_IWOTH);
1473                 cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1474                 umask(oumask);
1475
1476                 if (cstate->copy_file == NULL)
1477                         ereport(ERROR,
1478                                         (errcode_for_file_access(),
1479                                          errmsg("could not open file \"%s\" for writing: %m",
1480                                                         cstate->filename)));
1481
1482                 fstat(fileno(cstate->copy_file), &st);
1483                 if (S_ISDIR(st.st_mode))
1484                         ereport(ERROR,
1485                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1486                                          errmsg("\"%s\" is a directory", cstate->filename)));
1487         }
1488
1489         MemoryContextSwitchTo(oldcontext);
1490
1491         return cstate;
1492 }
1493
1494 /*
1495  * This intermediate routine exists mainly to localize the effects of setjmp
1496  * so we don't need to plaster a lot of variables with "volatile".
1497  */
1498 static uint64
1499 DoCopyTo(CopyState cstate)
1500 {
1501         bool            pipe = (cstate->filename == NULL);
1502         bool            fe_copy = (pipe && whereToSendOutput == DestRemote);
1503         uint64          processed;
1504
1505         PG_TRY();
1506         {
1507                 if (fe_copy)
1508                         SendCopyBegin(cstate);
1509
1510                 processed = CopyTo(cstate);
1511
1512                 if (fe_copy)
1513                         SendCopyEnd(cstate);
1514         }
1515         PG_CATCH();
1516         {
1517                 /*
1518                  * Make sure we turn off old-style COPY OUT mode upon error. It is
1519                  * okay to do this in all cases, since it does nothing if the mode is
1520                  * not on.
1521                  */
1522                 pq_endcopyout(true);
1523                 PG_RE_THROW();
1524         }
1525         PG_END_TRY();
1526
1527         return processed;
1528 }
1529
1530 /*
1531  * Clean up storage and release resources for COPY TO.
1532  */
1533 static void
1534 EndCopyTo(CopyState cstate)
1535 {
1536         if (cstate->queryDesc != NULL)
1537         {
1538                 /* Close down the query and free resources. */
1539                 ExecutorFinish(cstate->queryDesc);
1540                 ExecutorEnd(cstate->queryDesc);
1541                 FreeQueryDesc(cstate->queryDesc);
1542                 PopActiveSnapshot();
1543         }
1544
1545         /* Clean up storage */
1546         EndCopy(cstate);
1547 }
1548
1549 /*
1550  * Copy from relation or query TO file.
1551  */
1552 static uint64
1553 CopyTo(CopyState cstate)
1554 {
1555         TupleDesc       tupDesc;
1556         int                     num_phys_attrs;
1557         Form_pg_attribute *attr;
1558         ListCell   *cur;
1559         uint64          processed;
1560
1561         if (cstate->rel)
1562                 tupDesc = RelationGetDescr(cstate->rel);
1563         else
1564                 tupDesc = cstate->queryDesc->tupDesc;
1565         attr = tupDesc->attrs;
1566         num_phys_attrs = tupDesc->natts;
1567         cstate->null_print_client = cstate->null_print;         /* default */
1568
1569         /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1570         cstate->fe_msgbuf = makeStringInfo();
1571
1572         /* Get info about the columns we need to process. */
1573         cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1574         foreach(cur, cstate->attnumlist)
1575         {
1576                 int                     attnum = lfirst_int(cur);
1577                 Oid                     out_func_oid;
1578                 bool            isvarlena;
1579
1580                 if (cstate->binary)
1581                         getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1582                                                                         &out_func_oid,
1583                                                                         &isvarlena);
1584                 else
1585                         getTypeOutputInfo(attr[attnum - 1]->atttypid,
1586                                                           &out_func_oid,
1587                                                           &isvarlena);
1588                 fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1589         }
1590
1591         /*
1592          * Create a temporary memory context that we can reset once per row to
1593          * recover palloc'd memory.  This avoids any problems with leaks inside
1594          * datatype output routines, and should be faster than retail pfree's
1595          * anyway.      (We don't need a whole econtext as CopyFrom does.)
1596          */
1597         cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1598                                                                                            "COPY TO",
1599                                                                                            ALLOCSET_DEFAULT_MINSIZE,
1600                                                                                            ALLOCSET_DEFAULT_INITSIZE,
1601                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1602
1603         if (cstate->binary)
1604         {
1605                 /* Generate header for a binary copy */
1606                 int32           tmp;
1607
1608                 /* Signature */
1609                 CopySendData(cstate, BinarySignature, 11);
1610                 /* Flags field */
1611                 tmp = 0;
1612                 if (cstate->oids)
1613                         tmp |= (1 << 16);
1614                 CopySendInt32(cstate, tmp);
1615                 /* No header extension */
1616                 tmp = 0;
1617                 CopySendInt32(cstate, tmp);
1618         }
1619         else
1620         {
1621                 /*
1622                  * For non-binary copy, we need to convert null_print to file
1623                  * encoding, because it will be sent directly with CopySendString.
1624                  */
1625                 if (cstate->need_transcoding)
1626                         cstate->null_print_client = pg_server_to_any(cstate->null_print,
1627                                                                                                           cstate->null_print_len,
1628                                                                                                           cstate->file_encoding);
1629
1630                 /* if a header has been requested send the line */
1631                 if (cstate->header_line)
1632                 {
1633                         bool            hdr_delim = false;
1634
1635                         foreach(cur, cstate->attnumlist)
1636                         {
1637                                 int                     attnum = lfirst_int(cur);
1638                                 char       *colname;
1639
1640                                 if (hdr_delim)
1641                                         CopySendChar(cstate, cstate->delim[0]);
1642                                 hdr_delim = true;
1643
1644                                 colname = NameStr(attr[attnum - 1]->attname);
1645
1646                                 CopyAttributeOutCSV(cstate, colname, false,
1647                                                                         list_length(cstate->attnumlist) == 1);
1648                         }
1649
1650                         CopySendEndOfRow(cstate);
1651                 }
1652         }
1653
1654         if (cstate->rel)
1655         {
1656                 Datum      *values;
1657                 bool       *nulls;
1658                 HeapScanDesc scandesc;
1659                 HeapTuple       tuple;
1660
1661                 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1662                 nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
1663
1664                 scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1665
1666                 processed = 0;
1667                 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1668                 {
1669                         CHECK_FOR_INTERRUPTS();
1670
1671                         /* Deconstruct the tuple ... faster than repeated heap_getattr */
1672                         heap_deform_tuple(tuple, tupDesc, values, nulls);
1673
1674                         /* Format and send the data */
1675                         CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
1676                         processed++;
1677                 }
1678
1679                 heap_endscan(scandesc);
1680
1681                 pfree(values);
1682                 pfree(nulls);
1683         }
1684         else
1685         {
1686                 /* run the plan --- the dest receiver will send tuples */
1687                 ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
1688                 processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1689         }
1690
1691         if (cstate->binary)
1692         {
1693                 /* Generate trailer for a binary copy */
1694                 CopySendInt16(cstate, -1);
1695                 /* Need to flush out the trailer */
1696                 CopySendEndOfRow(cstate);
1697         }
1698
1699         MemoryContextDelete(cstate->rowcontext);
1700
1701         return processed;
1702 }
1703
1704 /*
1705  * Emit one row during CopyTo().
1706  */
1707 static void
1708 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
1709 {
1710         bool            need_delim = false;
1711         FmgrInfo   *out_functions = cstate->out_functions;
1712         MemoryContext oldcontext;
1713         ListCell   *cur;
1714         char       *string;
1715
1716         MemoryContextReset(cstate->rowcontext);
1717         oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1718
1719         if (cstate->binary)
1720         {
1721                 /* Binary per-tuple header */
1722                 CopySendInt16(cstate, list_length(cstate->attnumlist));
1723                 /* Send OID if wanted --- note attnumlist doesn't include it */
1724                 if (cstate->oids)
1725                 {
1726                         /* Hack --- assume Oid is same size as int32 */
1727                         CopySendInt32(cstate, sizeof(int32));
1728                         CopySendInt32(cstate, tupleOid);
1729                 }
1730         }
1731         else
1732         {
1733                 /* Text format has no per-tuple header, but send OID if wanted */
1734                 /* Assume digits don't need any quoting or encoding conversion */
1735                 if (cstate->oids)
1736                 {
1737                         string = DatumGetCString(DirectFunctionCall1(oidout,
1738                                                                                                 ObjectIdGetDatum(tupleOid)));
1739                         CopySendString(cstate, string);
1740                         need_delim = true;
1741                 }
1742         }
1743
1744         foreach(cur, cstate->attnumlist)
1745         {
1746                 int                     attnum = lfirst_int(cur);
1747                 Datum           value = values[attnum - 1];
1748                 bool            isnull = nulls[attnum - 1];
1749
1750                 if (!cstate->binary)
1751                 {
1752                         if (need_delim)
1753                                 CopySendChar(cstate, cstate->delim[0]);
1754                         need_delim = true;
1755                 }
1756
1757                 if (isnull)
1758                 {
1759                         if (!cstate->binary)
1760                                 CopySendString(cstate, cstate->null_print_client);
1761                         else
1762                                 CopySendInt32(cstate, -1);
1763                 }
1764                 else
1765                 {
1766                         if (!cstate->binary)
1767                         {
1768                                 string = OutputFunctionCall(&out_functions[attnum - 1],
1769                                                                                         value);
1770                                 if (cstate->csv_mode)
1771                                         CopyAttributeOutCSV(cstate, string,
1772                                                                                 cstate->force_quote_flags[attnum - 1],
1773                                                                                 list_length(cstate->attnumlist) == 1);
1774                                 else
1775                                         CopyAttributeOutText(cstate, string);
1776                         }
1777                         else
1778                         {
1779                                 bytea      *outputbytes;
1780
1781                                 outputbytes = SendFunctionCall(&out_functions[attnum - 1],
1782                                                                                            value);
1783                                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1784                                 CopySendData(cstate, VARDATA(outputbytes),
1785                                                          VARSIZE(outputbytes) - VARHDRSZ);
1786                         }
1787                 }
1788         }
1789
1790         CopySendEndOfRow(cstate);
1791
1792         MemoryContextSwitchTo(oldcontext);
1793 }
1794
1795
1796 /*
1797  * error context callback for COPY FROM
1798  *
1799  * The argument for the error context must be CopyState.
1800  */
1801 void
1802 CopyFromErrorCallback(void *arg)
1803 {
1804         CopyState       cstate = (CopyState) arg;
1805
1806         if (cstate->binary)
1807         {
1808                 /* can't usefully display the data */
1809                 if (cstate->cur_attname)
1810                         errcontext("COPY %s, line %d, column %s",
1811                                            cstate->cur_relname, cstate->cur_lineno,
1812                                            cstate->cur_attname);
1813                 else
1814                         errcontext("COPY %s, line %d",
1815                                            cstate->cur_relname, cstate->cur_lineno);
1816         }
1817         else
1818         {
1819                 if (cstate->cur_attname && cstate->cur_attval)
1820                 {
1821                         /* error is relevant to a particular column */
1822                         char       *attval;
1823
1824                         attval = limit_printout_length(cstate->cur_attval);
1825                         errcontext("COPY %s, line %d, column %s: \"%s\"",
1826                                            cstate->cur_relname, cstate->cur_lineno,
1827                                            cstate->cur_attname, attval);
1828                         pfree(attval);
1829                 }
1830                 else if (cstate->cur_attname)
1831                 {
1832                         /* error is relevant to a particular column, value is NULL */
1833                         errcontext("COPY %s, line %d, column %s: null input",
1834                                            cstate->cur_relname, cstate->cur_lineno,
1835                                            cstate->cur_attname);
1836                 }
1837                 else
1838                 {
1839                         /* error is relevant to a particular line */
1840                         if (cstate->line_buf_converted || !cstate->need_transcoding)
1841                         {
1842                                 char       *lineval;
1843
1844                                 lineval = limit_printout_length(cstate->line_buf.data);
1845                                 errcontext("COPY %s, line %d: \"%s\"",
1846                                                    cstate->cur_relname, cstate->cur_lineno, lineval);
1847                                 pfree(lineval);
1848                         }
1849                         else
1850                         {
1851                                 /*
1852                                  * Here, the line buffer is still in a foreign encoding, and
1853                                  * indeed it's quite likely that the error is precisely a
1854                                  * failure to do encoding conversion (ie, bad data).  We dare
1855                                  * not try to convert it, and at present there's no way to
1856                                  * regurgitate it without conversion.  So we have to punt and
1857                                  * just report the line number.
1858                                  */
1859                                 errcontext("COPY %s, line %d",
1860                                                    cstate->cur_relname, cstate->cur_lineno);
1861                         }
1862                 }
1863         }
1864 }
1865
1866 /*
1867  * Make sure we don't print an unreasonable amount of COPY data in a message.
1868  *
1869  * It would seem a lot easier to just use the sprintf "precision" limit to
1870  * truncate the string.  However, some versions of glibc have a bug/misfeature
1871  * that vsnprintf will always fail (return -1) if it is asked to truncate
1872  * a string that contains invalid byte sequences for the current encoding.
1873  * So, do our own truncation.  We return a pstrdup'd copy of the input.
1874  */
1875 static char *
1876 limit_printout_length(const char *str)
1877 {
1878 #define MAX_COPY_DATA_DISPLAY 100
1879
1880         int                     slen = strlen(str);
1881         int                     len;
1882         char       *res;
1883
1884         /* Fast path if definitely okay */
1885         if (slen <= MAX_COPY_DATA_DISPLAY)
1886                 return pstrdup(str);
1887
1888         /* Apply encoding-dependent truncation */
1889         len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
1890
1891         /*
1892          * Truncate, and add "..." to show we truncated the input.
1893          */
1894         res = (char *) palloc(len + 4);
1895         memcpy(res, str, len);
1896         strcpy(res + len, "...");
1897
1898         return res;
1899 }
1900
1901 /*
1902  * Copy FROM file to relation.
1903  */
1904 static uint64
1905 CopyFrom(CopyState cstate)
1906 {
1907         HeapTuple       tuple;
1908         TupleDesc       tupDesc;
1909         Datum      *values;
1910         bool       *nulls;
1911         ResultRelInfo *resultRelInfo;
1912         EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
1913         ExprContext *econtext;
1914         TupleTableSlot *myslot;
1915         MemoryContext oldcontext = CurrentMemoryContext;
1916
1917         ErrorContextCallback errcallback;
1918         CommandId       mycid = GetCurrentCommandId(true);
1919         int                     hi_options = 0; /* start with default heap_insert options */
1920         BulkInsertState bistate;
1921         uint64          processed = 0;
1922         bool            useHeapMultiInsert;
1923         int                     nBufferedTuples = 0;
1924
1925 #define MAX_BUFFERED_TUPLES 1000
1926         HeapTuple  *bufferedTuples = NULL;      /* initialize to silence warning */
1927         Size            bufferedTuplesSize = 0;
1928
1929         Assert(cstate->rel);
1930
1931         if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
1932         {
1933                 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
1934                         ereport(ERROR,
1935                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1936                                          errmsg("cannot copy to view \"%s\"",
1937                                                         RelationGetRelationName(cstate->rel))));
1938                 else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1939                         ereport(ERROR,
1940                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1941                                          errmsg("cannot copy to foreign table \"%s\"",
1942                                                         RelationGetRelationName(cstate->rel))));
1943                 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
1944                         ereport(ERROR,
1945                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1946                                          errmsg("cannot copy to sequence \"%s\"",
1947                                                         RelationGetRelationName(cstate->rel))));
1948                 else
1949                         ereport(ERROR,
1950                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1951                                          errmsg("cannot copy to non-table relation \"%s\"",
1952                                                         RelationGetRelationName(cstate->rel))));
1953         }
1954
1955         tupDesc = RelationGetDescr(cstate->rel);
1956
1957         /*----------
1958          * Check to see if we can avoid writing WAL
1959          *
1960          * If archive logging/streaming is not enabled *and* either
1961          *      - table was created in same transaction as this COPY
1962          *      - data is being written to relfilenode created in this transaction
1963          * then we can skip writing WAL.  It's safe because if the transaction
1964          * doesn't commit, we'll discard the table (or the new relfilenode file).
1965          * If it does commit, we'll have done the heap_sync at the bottom of this
1966          * routine first.
1967          *
1968          * As mentioned in comments in utils/rel.h, the in-same-transaction test
1969          * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
1970          * can be cleared before the end of the transaction. The exact case is
1971          * when a relation sets a new relfilenode twice in same transaction, yet
1972          * the second one fails in an aborted subtransaction, e.g.
1973          *
1974          * BEGIN;
1975          * TRUNCATE t;
1976          * SAVEPOINT save;
1977          * TRUNCATE t;
1978          * ROLLBACK TO save;
1979          * COPY ...
1980          *
1981          * However this is OK since at worst we will fail to make the optimization.
1982          *
1983          * Also, if the target file is new-in-transaction, we assume that checking
1984          * FSM for free space is a waste of time, even if we must use WAL because
1985          * of archiving.  This could possibly be wrong, but it's unlikely.
1986          *
1987          * The comments for heap_insert and RelationGetBufferForTuple specify that
1988          * skipping WAL logging is only safe if we ensure that our tuples do not
1989          * go into pages containing tuples from any other transactions --- but this
1990          * must be the case if we have a new table or new relfilenode, so we need
1991          * no additional work to enforce that.
1992          *----------
1993          */
1994         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
1995                 cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
1996         {
1997                 hi_options |= HEAP_INSERT_SKIP_FSM;
1998                 if (!XLogIsNeeded())
1999                         hi_options |= HEAP_INSERT_SKIP_WAL;
2000
2001                 /*
2002                  * Optimize if new relfilenode was created in this subxact or
2003                  * one of its committed children and we won't see those rows later
2004                  * as part of an earlier scan or command. This ensures that if this
2005                  * subtransaction aborts then the frozen rows won't be visible
2006                  * after xact cleanup. Note that the stronger test of exactly
2007                  * which subtransaction created it is crucial for correctness
2008                  * of this optimisation.
2009                  *
2010                  * As noted above rd_newRelfilenodeSubid is not set in all cases
2011                  * where we can apply the optimization, so in those rare cases
2012                  * where we cannot honour the request we do so silently.
2013                  */
2014                 if (cstate->freeze &&
2015                         ThereAreNoPriorRegisteredSnapshots() &&
2016                         ThereAreNoReadyPortals() &&
2017                         (cstate->rel->rd_newRelfilenodeSubid == GetCurrentSubTransactionId() ||
2018                          cstate->rel->rd_createSubid == GetCurrentSubTransactionId()))
2019                         hi_options |= HEAP_INSERT_FROZEN;
2020         }
2021
2022         /*
2023          * We need a ResultRelInfo so we can use the regular executor's
2024          * index-entry-making machinery.  (There used to be a huge amount of code
2025          * here that basically duplicated execUtils.c ...)
2026          */
2027         resultRelInfo = makeNode(ResultRelInfo);
2028         resultRelInfo->ri_RangeTableIndex = 1;          /* dummy */
2029         resultRelInfo->ri_RelationDesc = cstate->rel;
2030         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(cstate->rel->trigdesc);
2031         if (resultRelInfo->ri_TrigDesc)
2032         {
2033                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
2034                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
2035                 resultRelInfo->ri_TrigWhenExprs = (List **)
2036                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(List *));
2037         }
2038         resultRelInfo->ri_TrigInstrument = NULL;
2039
2040         ExecOpenIndices(resultRelInfo);
2041
2042         estate->es_result_relations = resultRelInfo;
2043         estate->es_num_result_relations = 1;
2044         estate->es_result_relation_info = resultRelInfo;
2045
2046         /* Set up a tuple slot too */
2047         myslot = ExecInitExtraTupleSlot(estate);
2048         ExecSetSlotDescriptor(myslot, tupDesc);
2049         /* Triggers might need a slot as well */
2050         estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
2051
2052         /*
2053          * It's more efficient to prepare a bunch of tuples for insertion, and
2054          * insert them in one heap_multi_insert() call, than call heap_insert()
2055          * separately for every tuple. However, we can't do that if there are
2056          * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
2057          * expressions. Such triggers or expressions might query the table we're
2058          * inserting to, and act differently if the tuples that have already been
2059          * processed and prepared for insertion are not there.
2060          */
2061         if ((resultRelInfo->ri_TrigDesc != NULL &&
2062                  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2063                   resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
2064                 cstate->volatile_defexprs)
2065         {
2066                 useHeapMultiInsert = false;
2067         }
2068         else
2069         {
2070                 useHeapMultiInsert = true;
2071                 bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
2072         }
2073
2074         /* Prepare to catch AFTER triggers. */
2075         AfterTriggerBeginQuery();
2076
2077         /*
2078          * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
2079          * should do this for COPY, since it's not really an "INSERT" statement as
2080          * such. However, executing these triggers maintains consistency with the
2081          * EACH ROW triggers that we already fire on COPY.
2082          */
2083         ExecBSInsertTriggers(estate, resultRelInfo);
2084
2085         values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
2086         nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
2087
2088         bistate = GetBulkInsertState();
2089         econtext = GetPerTupleExprContext(estate);
2090
2091         /* Set up callback to identify error line number */
2092         errcallback.callback = CopyFromErrorCallback;
2093         errcallback.arg = (void *) cstate;
2094         errcallback.previous = error_context_stack;
2095         error_context_stack = &errcallback;
2096
2097         for (;;)
2098         {
2099                 TupleTableSlot *slot;
2100                 bool            skip_tuple;
2101                 Oid                     loaded_oid = InvalidOid;
2102
2103                 CHECK_FOR_INTERRUPTS();
2104
2105                 if (nBufferedTuples == 0)
2106                 {
2107                         /*
2108                          * Reset the per-tuple exprcontext. We can only do this if the
2109                          * tuple buffer is empty (calling the context the per-tuple memory
2110                          * context is a bit of a misnomer now
2111                          */
2112                         ResetPerTupleExprContext(estate);
2113                 }
2114
2115                 /* Switch into its memory context */
2116                 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2117
2118                 if (!NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid))
2119                         break;
2120
2121                 /* And now we can form the input tuple. */
2122                 tuple = heap_form_tuple(tupDesc, values, nulls);
2123
2124                 if (loaded_oid != InvalidOid)
2125                         HeapTupleSetOid(tuple, loaded_oid);
2126
2127                 /* Triggers and stuff need to be invoked in query context. */
2128                 MemoryContextSwitchTo(oldcontext);
2129
2130                 /* Place tuple in tuple slot --- but slot shouldn't free it */
2131                 slot = myslot;
2132                 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2133
2134                 skip_tuple = false;
2135
2136                 /* BEFORE ROW INSERT Triggers */
2137                 if (resultRelInfo->ri_TrigDesc &&
2138                         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
2139                 {
2140                         slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
2141
2142                         if (slot == NULL)       /* "do nothing" */
2143                                 skip_tuple = true;
2144                         else    /* trigger might have changed tuple */
2145                                 tuple = ExecMaterializeSlot(slot);
2146                 }
2147
2148                 if (!skip_tuple)
2149                 {
2150                         /* Check the constraints of the tuple */
2151                         if (cstate->rel->rd_att->constr)
2152                                 ExecConstraints(resultRelInfo, slot, estate);
2153
2154                         if (useHeapMultiInsert)
2155                         {
2156                                 /* Add this tuple to the tuple buffer */
2157                                 bufferedTuples[nBufferedTuples++] = tuple;
2158                                 bufferedTuplesSize += tuple->t_len;
2159
2160                                 /*
2161                                  * If the buffer filled up, flush it. Also flush if the total
2162                                  * size of all the tuples in the buffer becomes large, to
2163                                  * avoid using large amounts of memory for the buffers when
2164                                  * the tuples are exceptionally wide.
2165                                  */
2166                                 if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
2167                                         bufferedTuplesSize > 65535)
2168                                 {
2169                                         CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2170                                                                                 resultRelInfo, myslot, bistate,
2171                                                                                 nBufferedTuples, bufferedTuples);
2172                                         nBufferedTuples = 0;
2173                                         bufferedTuplesSize = 0;
2174                                 }
2175                         }
2176                         else
2177                         {
2178                                 List       *recheckIndexes = NIL;
2179
2180                                 /* OK, store the tuple and create index entries for it */
2181                                 heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
2182
2183                                 if (resultRelInfo->ri_NumIndices > 0)
2184                                         recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
2185                                                                                                                    estate);
2186
2187                                 /* AFTER ROW INSERT Triggers */
2188                                 ExecARInsertTriggers(estate, resultRelInfo, tuple,
2189                                                                          recheckIndexes);
2190
2191                                 list_free(recheckIndexes);
2192                         }
2193
2194                         /*
2195                          * We count only tuples not suppressed by a BEFORE INSERT trigger;
2196                          * this is the same definition used by execMain.c for counting
2197                          * tuples inserted by an INSERT command.
2198                          */
2199                         processed++;
2200                 }
2201         }
2202
2203         /* Flush any remaining buffered tuples */
2204         if (nBufferedTuples > 0)
2205                 CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2206                                                         resultRelInfo, myslot, bistate,
2207                                                         nBufferedTuples, bufferedTuples);
2208
2209         /* Done, clean up */
2210         error_context_stack = errcallback.previous;
2211
2212         FreeBulkInsertState(bistate);
2213
2214         MemoryContextSwitchTo(oldcontext);
2215
2216         /* Execute AFTER STATEMENT insertion triggers */
2217         ExecASInsertTriggers(estate, resultRelInfo);
2218
2219         /* Handle queued AFTER triggers */
2220         AfterTriggerEndQuery(estate);
2221
2222         pfree(values);
2223         pfree(nulls);
2224
2225         ExecResetTupleTable(estate->es_tupleTable, false);
2226
2227         ExecCloseIndices(resultRelInfo);
2228
2229         FreeExecutorState(estate);
2230
2231         /*
2232          * If we skipped writing WAL, then we need to sync the heap (but not
2233          * indexes since those use WAL anyway)
2234          */
2235         if (hi_options & HEAP_INSERT_SKIP_WAL)
2236                 heap_sync(cstate->rel);
2237
2238         return processed;
2239 }
2240
2241 /*
2242  * A subroutine of CopyFrom, to write the current batch of buffered heap
2243  * tuples to the heap. Also updates indexes and runs AFTER ROW INSERT
2244  * triggers.
2245  */
2246 static void
2247 CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
2248                                         int hi_options, ResultRelInfo *resultRelInfo,
2249                                         TupleTableSlot *myslot, BulkInsertState bistate,
2250                                         int nBufferedTuples, HeapTuple *bufferedTuples)
2251 {
2252         MemoryContext oldcontext;
2253         int                     i;
2254
2255         /*
2256          * heap_multi_insert leaks memory, so switch to short-lived memory context
2257          * before calling it.
2258          */
2259         oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2260         heap_multi_insert(cstate->rel,
2261                                           bufferedTuples,
2262                                           nBufferedTuples,
2263                                           mycid,
2264                                           hi_options,
2265                                           bistate);
2266         MemoryContextSwitchTo(oldcontext);
2267
2268         /*
2269          * If there are any indexes, update them for all the inserted tuples, and
2270          * run AFTER ROW INSERT triggers.
2271          */
2272         if (resultRelInfo->ri_NumIndices > 0)
2273         {
2274                 for (i = 0; i < nBufferedTuples; i++)
2275                 {
2276                         List       *recheckIndexes;
2277
2278                         ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
2279                         recheckIndexes =
2280                                 ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
2281                                                                           estate);
2282                         ExecARInsertTriggers(estate, resultRelInfo,
2283                                                                  bufferedTuples[i],
2284                                                                  recheckIndexes);
2285                         list_free(recheckIndexes);
2286                 }
2287         }
2288
2289         /*
2290          * There's no indexes, but see if we need to run AFTER ROW INSERT triggers
2291          * anyway.
2292          */
2293         else if (resultRelInfo->ri_TrigDesc != NULL &&
2294                          resultRelInfo->ri_TrigDesc->trig_insert_after_row)
2295         {
2296                 for (i = 0; i < nBufferedTuples; i++)
2297                         ExecARInsertTriggers(estate, resultRelInfo,
2298                                                                  bufferedTuples[i],
2299                                                                  NIL);
2300         }
2301 }
2302
2303 /*
2304  * Setup to read tuples from a file for COPY FROM.
2305  *
2306  * 'rel': Used as a template for the tuples
2307  * 'filename': Name of server-local file to read
2308  * 'attnamelist': List of char *, columns to include. NIL selects all cols.
2309  * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
2310  *
2311  * Returns a CopyState, to be passed to NextCopyFrom and related functions.
2312  */
2313 CopyState
2314 BeginCopyFrom(Relation rel,
2315                           const char *filename,
2316                           List *attnamelist,
2317                           List *options)
2318 {
2319         CopyState       cstate;
2320         bool            pipe = (filename == NULL);
2321         TupleDesc       tupDesc;
2322         Form_pg_attribute *attr;
2323         AttrNumber      num_phys_attrs,
2324                                 num_defaults;
2325         FmgrInfo   *in_functions;
2326         Oid                *typioparams;
2327         int                     attnum;
2328         Oid                     in_func_oid;
2329         int                *defmap;
2330         ExprState **defexprs;
2331         MemoryContext oldcontext;
2332         bool            volatile_defexprs;
2333
2334         cstate = BeginCopy(true, rel, NULL, NULL, attnamelist, options);
2335         oldcontext = MemoryContextSwitchTo(cstate->copycontext);
2336
2337         /* Initialize state variables */
2338         cstate->fe_eof = false;
2339         cstate->eol_type = EOL_UNKNOWN;
2340         cstate->cur_relname = RelationGetRelationName(cstate->rel);
2341         cstate->cur_lineno = 0;
2342         cstate->cur_attname = NULL;
2343         cstate->cur_attval = NULL;
2344
2345         /* Set up variables to avoid per-attribute overhead. */
2346         initStringInfo(&cstate->attribute_buf);
2347         initStringInfo(&cstate->line_buf);
2348         cstate->line_buf_converted = false;
2349         cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
2350         cstate->raw_buf_index = cstate->raw_buf_len = 0;
2351
2352         tupDesc = RelationGetDescr(cstate->rel);
2353         attr = tupDesc->attrs;
2354         num_phys_attrs = tupDesc->natts;
2355         num_defaults = 0;
2356         volatile_defexprs = false;
2357
2358         /*
2359          * Pick up the required catalog information for each attribute in the
2360          * relation, including the input function, the element type (to pass to
2361          * the input function), and info about defaults and constraints. (Which
2362          * input function we use depends on text/binary format choice.)
2363          */
2364         in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
2365         typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
2366         defmap = (int *) palloc(num_phys_attrs * sizeof(int));
2367         defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
2368
2369         for (attnum = 1; attnum <= num_phys_attrs; attnum++)
2370         {
2371                 /* We don't need info for dropped attributes */
2372                 if (attr[attnum - 1]->attisdropped)
2373                         continue;
2374
2375                 /* Fetch the input function and typioparam info */
2376                 if (cstate->binary)
2377                         getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
2378                                                                    &in_func_oid, &typioparams[attnum - 1]);
2379                 else
2380                         getTypeInputInfo(attr[attnum - 1]->atttypid,
2381                                                          &in_func_oid, &typioparams[attnum - 1]);
2382                 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
2383
2384                 /* Get default info if needed */
2385                 if (!list_member_int(cstate->attnumlist, attnum))
2386                 {
2387                         /* attribute is NOT to be copied from input */
2388                         /* use default value if one exists */
2389                         Node       *defexpr = build_column_default(cstate->rel, attnum);
2390
2391                         if (defexpr != NULL)
2392                         {
2393                                 /* Initialize expressions in copycontext. */
2394                                 defexprs[num_defaults] = ExecInitExpr(
2395                                                                  expression_planner((Expr *) defexpr), NULL);
2396                                 defmap[num_defaults] = attnum - 1;
2397                                 num_defaults++;
2398
2399                                 if (!volatile_defexprs)
2400                                         volatile_defexprs = contain_volatile_functions(defexpr);
2401                         }
2402                 }
2403         }
2404
2405         /* We keep those variables in cstate. */
2406         cstate->in_functions = in_functions;
2407         cstate->typioparams = typioparams;
2408         cstate->defmap = defmap;
2409         cstate->defexprs = defexprs;
2410         cstate->volatile_defexprs = volatile_defexprs;
2411         cstate->num_defaults = num_defaults;
2412
2413         if (pipe)
2414         {
2415                 if (whereToSendOutput == DestRemote)
2416                         ReceiveCopyBegin(cstate);
2417                 else
2418                         cstate->copy_file = stdin;
2419         }
2420         else
2421         {
2422                 struct stat st;
2423
2424                 cstate->filename = pstrdup(filename);
2425                 cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
2426
2427                 if (cstate->copy_file == NULL)
2428                         ereport(ERROR,
2429                                         (errcode_for_file_access(),
2430                                          errmsg("could not open file \"%s\" for reading: %m",
2431                                                         cstate->filename)));
2432
2433                 fstat(fileno(cstate->copy_file), &st);
2434                 if (S_ISDIR(st.st_mode))
2435                         ereport(ERROR,
2436                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2437                                          errmsg("\"%s\" is a directory", cstate->filename)));
2438         }
2439
2440         if (!cstate->binary)
2441         {
2442                 /* must rely on user to tell us... */
2443                 cstate->file_has_oids = cstate->oids;
2444         }
2445         else
2446         {
2447                 /* Read and verify binary header */
2448                 char            readSig[11];
2449                 int32           tmp;
2450
2451                 /* Signature */
2452                 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
2453                         memcmp(readSig, BinarySignature, 11) != 0)
2454                         ereport(ERROR,
2455                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2456                                          errmsg("COPY file signature not recognized")));
2457                 /* Flags field */
2458                 if (!CopyGetInt32(cstate, &tmp))
2459                         ereport(ERROR,
2460                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2461                                          errmsg("invalid COPY file header (missing flags)")));
2462                 cstate->file_has_oids = (tmp & (1 << 16)) != 0;
2463                 tmp &= ~(1 << 16);
2464                 if ((tmp >> 16) != 0)
2465                         ereport(ERROR,
2466                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2467                                  errmsg("unrecognized critical flags in COPY file header")));
2468                 /* Header extension length */
2469                 if (!CopyGetInt32(cstate, &tmp) ||
2470                         tmp < 0)
2471                         ereport(ERROR,
2472                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2473                                          errmsg("invalid COPY file header (missing length)")));
2474                 /* Skip extension header, if present */
2475                 while (tmp-- > 0)
2476                 {
2477                         if (CopyGetData(cstate, readSig, 1, 1) != 1)
2478                                 ereport(ERROR,
2479                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2480                                                  errmsg("invalid COPY file header (wrong length)")));
2481                 }
2482         }
2483
2484         if (cstate->file_has_oids && cstate->binary)
2485         {
2486                 getTypeBinaryInputInfo(OIDOID,
2487                                                            &in_func_oid, &cstate->oid_typioparam);
2488                 fmgr_info(in_func_oid, &cstate->oid_in_function);
2489         }
2490
2491         /* create workspace for CopyReadAttributes results */
2492         if (!cstate->binary)
2493         {
2494                 AttrNumber      attr_count = list_length(cstate->attnumlist);
2495                 int                     nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
2496
2497                 cstate->max_fields = nfields;
2498                 cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
2499         }
2500
2501         MemoryContextSwitchTo(oldcontext);
2502
2503         return cstate;
2504 }
2505
2506 /*
2507  * Read raw fields in the next line for COPY FROM in text or csv mode.
2508  * Return false if no more lines.
2509  *
2510  * An internal temporary buffer is returned via 'fields'. It is valid until
2511  * the next call of the function. Since the function returns all raw fields
2512  * in the input file, 'nfields' could be different from the number of columns
2513  * in the relation.
2514  *
2515  * NOTE: force_not_null option are not applied to the returned fields.
2516  */
2517 bool
2518 NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
2519 {
2520         int                     fldct;
2521         bool            done;
2522
2523         /* only available for text or csv input */
2524         Assert(!cstate->binary);
2525
2526         /* on input just throw the header line away */
2527         if (cstate->cur_lineno == 0 && cstate->header_line)
2528         {
2529                 cstate->cur_lineno++;
2530                 if (CopyReadLine(cstate))
2531                         return false;           /* done */
2532         }
2533
2534         cstate->cur_lineno++;
2535
2536         /* Actually read the line into memory here */
2537         done = CopyReadLine(cstate);
2538
2539         /*
2540          * EOF at start of line means we're done.  If we see EOF after some
2541          * characters, we act as though it was newline followed by EOF, ie,
2542          * process the line and then exit loop on next iteration.
2543          */
2544         if (done && cstate->line_buf.len == 0)
2545                 return false;
2546
2547         /* Parse the line into de-escaped field values */
2548         if (cstate->csv_mode)
2549                 fldct = CopyReadAttributesCSV(cstate);
2550         else
2551                 fldct = CopyReadAttributesText(cstate);
2552
2553         *fields = cstate->raw_fields;
2554         *nfields = fldct;
2555         return true;
2556 }
2557
2558 /*
2559  * Read next tuple from file for COPY FROM. Return false if no more tuples.
2560  *
2561  * 'econtext' is used to evaluate default expression for each columns not
2562  * read from the file. It can be NULL when no default values are used, i.e.
2563  * when all columns are read from the file.
2564  *
2565  * 'values' and 'nulls' arrays must be the same length as columns of the
2566  * relation passed to BeginCopyFrom. This function fills the arrays.
2567  * Oid of the tuple is returned with 'tupleOid' separately.
2568  */
2569 bool
2570 NextCopyFrom(CopyState cstate, ExprContext *econtext,
2571                          Datum *values, bool *nulls, Oid *tupleOid)
2572 {
2573         TupleDesc       tupDesc;
2574         Form_pg_attribute *attr;
2575         AttrNumber      num_phys_attrs,
2576                                 attr_count,
2577                                 num_defaults = cstate->num_defaults;
2578         FmgrInfo   *in_functions = cstate->in_functions;
2579         Oid                *typioparams = cstate->typioparams;
2580         int                     i;
2581         int                     nfields;
2582         bool            isnull;
2583         bool            file_has_oids = cstate->file_has_oids;
2584         int                *defmap = cstate->defmap;
2585         ExprState **defexprs = cstate->defexprs;
2586
2587         tupDesc = RelationGetDescr(cstate->rel);
2588         attr = tupDesc->attrs;
2589         num_phys_attrs = tupDesc->natts;
2590         attr_count = list_length(cstate->attnumlist);
2591         nfields = file_has_oids ? (attr_count + 1) : attr_count;
2592
2593         /* Initialize all values for row to NULL */
2594         MemSet(values, 0, num_phys_attrs * sizeof(Datum));
2595         MemSet(nulls, true, num_phys_attrs * sizeof(bool));
2596
2597         if (!cstate->binary)
2598         {
2599                 char      **field_strings;
2600                 ListCell   *cur;
2601                 int                     fldct;
2602                 int                     fieldno;
2603                 char       *string;
2604
2605                 /* read raw fields in the next line */
2606                 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
2607                         return false;
2608
2609                 /* check for overflowing fields */
2610                 if (nfields > 0 && fldct > nfields)
2611                         ereport(ERROR,
2612                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2613                                          errmsg("extra data after last expected column")));
2614
2615                 fieldno = 0;
2616
2617                 /* Read the OID field if present */
2618                 if (file_has_oids)
2619                 {
2620                         if (fieldno >= fldct)
2621                                 ereport(ERROR,
2622                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2623                                                  errmsg("missing data for OID column")));
2624                         string = field_strings[fieldno++];
2625
2626                         if (string == NULL)
2627                                 ereport(ERROR,
2628                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2629                                                  errmsg("null OID in COPY data")));
2630                         else if (cstate->oids && tupleOid != NULL)
2631                         {
2632                                 cstate->cur_attname = "oid";
2633                                 cstate->cur_attval = string;
2634                                 *tupleOid = DatumGetObjectId(DirectFunctionCall1(oidin,
2635                                                                                                    CStringGetDatum(string)));
2636                                 if (*tupleOid == InvalidOid)
2637                                         ereport(ERROR,
2638                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2639                                                          errmsg("invalid OID in COPY data")));
2640                                 cstate->cur_attname = NULL;
2641                                 cstate->cur_attval = NULL;
2642                         }
2643                 }
2644
2645                 /* Loop to read the user attributes on the line. */
2646                 foreach(cur, cstate->attnumlist)
2647                 {
2648                         int                     attnum = lfirst_int(cur);
2649                         int                     m = attnum - 1;
2650
2651                         if (fieldno >= fldct)
2652                                 ereport(ERROR,
2653                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2654                                                  errmsg("missing data for column \"%s\"",
2655                                                                 NameStr(attr[m]->attname))));
2656                         string = field_strings[fieldno++];
2657
2658                         if (cstate->convert_select_flags &&
2659                                 !cstate->convert_select_flags[m])
2660                         {
2661                                 /* ignore input field, leaving column as NULL */
2662                                 continue;
2663                         }
2664
2665                         if (cstate->csv_mode && string == NULL &&
2666                                 cstate->force_notnull_flags[m])
2667                         {
2668                                 /* Go ahead and read the NULL string */
2669                                 string = cstate->null_print;
2670                         }
2671
2672                         cstate->cur_attname = NameStr(attr[m]->attname);
2673                         cstate->cur_attval = string;
2674                         values[m] = InputFunctionCall(&in_functions[m],
2675                                                                                   string,
2676                                                                                   typioparams[m],
2677                                                                                   attr[m]->atttypmod);
2678                         if (string != NULL)
2679                                 nulls[m] = false;
2680                         cstate->cur_attname = NULL;
2681                         cstate->cur_attval = NULL;
2682                 }
2683
2684                 Assert(fieldno == nfields);
2685         }
2686         else
2687         {
2688                 /* binary */
2689                 int16           fld_count;
2690                 ListCell   *cur;
2691
2692                 cstate->cur_lineno++;
2693
2694                 if (!CopyGetInt16(cstate, &fld_count))
2695                 {
2696                         /* EOF detected (end of file, or protocol-level EOF) */
2697                         return false;
2698                 }
2699
2700                 if (fld_count == -1)
2701                 {
2702                         /*
2703                          * Received EOF marker.  In a V3-protocol copy, wait for the
2704                          * protocol-level EOF, and complain if it doesn't come
2705                          * immediately.  This ensures that we correctly handle CopyFail,
2706                          * if client chooses to send that now.
2707                          *
2708                          * Note that we MUST NOT try to read more data in an old-protocol
2709                          * copy, since there is no protocol-level EOF marker then.      We
2710                          * could go either way for copy from file, but choose to throw
2711                          * error if there's data after the EOF marker, for consistency
2712                          * with the new-protocol case.
2713                          */
2714                         char            dummy;
2715
2716                         if (cstate->copy_dest != COPY_OLD_FE &&
2717                                 CopyGetData(cstate, &dummy, 1, 1) > 0)
2718                                 ereport(ERROR,
2719                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2720                                                  errmsg("received copy data after EOF marker")));
2721                         return false;
2722                 }
2723
2724                 if (fld_count != attr_count)
2725                         ereport(ERROR,
2726                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2727                                          errmsg("row field count is %d, expected %d",
2728                                                         (int) fld_count, attr_count)));
2729
2730                 if (file_has_oids)
2731                 {
2732                         Oid                     loaded_oid;
2733
2734                         cstate->cur_attname = "oid";
2735                         loaded_oid =
2736                                 DatumGetObjectId(CopyReadBinaryAttribute(cstate,
2737                                                                                                                  0,
2738                                                                                                         &cstate->oid_in_function,
2739                                                                                                           cstate->oid_typioparam,
2740                                                                                                                  -1,
2741                                                                                                                  &isnull));
2742                         if (isnull || loaded_oid == InvalidOid)
2743                                 ereport(ERROR,
2744                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2745                                                  errmsg("invalid OID in COPY data")));
2746                         cstate->cur_attname = NULL;
2747                         if (cstate->oids && tupleOid != NULL)
2748                                 *tupleOid = loaded_oid;
2749                 }
2750
2751                 i = 0;
2752                 foreach(cur, cstate->attnumlist)
2753                 {
2754                         int                     attnum = lfirst_int(cur);
2755                         int                     m = attnum - 1;
2756
2757                         cstate->cur_attname = NameStr(attr[m]->attname);
2758                         i++;
2759                         values[m] = CopyReadBinaryAttribute(cstate,
2760                                                                                                 i,
2761                                                                                                 &in_functions[m],
2762                                                                                                 typioparams[m],
2763                                                                                                 attr[m]->atttypmod,
2764                                                                                                 &nulls[m]);
2765                         cstate->cur_attname = NULL;
2766                 }
2767         }
2768
2769         /*
2770          * Now compute and insert any defaults available for the columns not
2771          * provided by the input data.  Anything not processed here or above will
2772          * remain NULL.
2773          */
2774         for (i = 0; i < num_defaults; i++)
2775         {
2776                 /*
2777                  * The caller must supply econtext and have switched into the
2778                  * per-tuple memory context in it.
2779                  */
2780                 Assert(econtext != NULL);
2781                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
2782
2783                 values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
2784                                                                                  &nulls[defmap[i]], NULL);
2785         }
2786
2787         return true;
2788 }
2789
2790 /*
2791  * Clean up storage and release resources for COPY FROM.
2792  */
2793 void
2794 EndCopyFrom(CopyState cstate)
2795 {
2796         /* No COPY FROM related resources except memory. */
2797
2798         EndCopy(cstate);
2799 }
2800
2801 /*
2802  * Read the next input line and stash it in line_buf, with conversion to
2803  * server encoding.
2804  *
2805  * Result is true if read was terminated by EOF, false if terminated
2806  * by newline.  The terminating newline or EOF marker is not included
2807  * in the final value of line_buf.
2808  */
2809 static bool
2810 CopyReadLine(CopyState cstate)
2811 {
2812         bool            result;
2813
2814         resetStringInfo(&cstate->line_buf);
2815
2816         /* Mark that encoding conversion hasn't occurred yet */
2817         cstate->line_buf_converted = false;
2818
2819         /* Parse data and transfer into line_buf */
2820         result = CopyReadLineText(cstate);
2821
2822         if (result)
2823         {
2824                 /*
2825                  * Reached EOF.  In protocol version 3, we should ignore anything
2826                  * after \. up to the protocol end of copy data.  (XXX maybe better
2827                  * not to treat \. as special?)
2828                  */
2829                 if (cstate->copy_dest == COPY_NEW_FE)
2830                 {
2831                         do
2832                         {
2833                                 cstate->raw_buf_index = cstate->raw_buf_len;
2834                         } while (CopyLoadRawBuf(cstate));
2835                 }
2836         }
2837         else
2838         {
2839                 /*
2840                  * If we didn't hit EOF, then we must have transferred the EOL marker
2841                  * to line_buf along with the data.  Get rid of it.
2842                  */
2843                 switch (cstate->eol_type)
2844                 {
2845                         case EOL_NL:
2846                                 Assert(cstate->line_buf.len >= 1);
2847                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2848                                 cstate->line_buf.len--;
2849                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2850                                 break;
2851                         case EOL_CR:
2852                                 Assert(cstate->line_buf.len >= 1);
2853                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
2854                                 cstate->line_buf.len--;
2855                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2856                                 break;
2857                         case EOL_CRNL:
2858                                 Assert(cstate->line_buf.len >= 2);
2859                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
2860                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2861                                 cstate->line_buf.len -= 2;
2862                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2863                                 break;
2864                         case EOL_UNKNOWN:
2865                                 /* shouldn't get here */
2866                                 Assert(false);
2867                                 break;
2868                 }
2869         }
2870
2871         /* Done reading the line.  Convert it to server encoding. */
2872         if (cstate->need_transcoding)
2873         {
2874                 char       *cvt;
2875
2876                 cvt = pg_any_to_server(cstate->line_buf.data,
2877                                                            cstate->line_buf.len,
2878                                                            cstate->file_encoding);
2879                 if (cvt != cstate->line_buf.data)
2880                 {
2881                         /* transfer converted data back to line_buf */
2882                         resetStringInfo(&cstate->line_buf);
2883                         appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
2884                         pfree(cvt);
2885                 }
2886         }
2887
2888         /* Now it's safe to use the buffer in error messages */
2889         cstate->line_buf_converted = true;
2890
2891         return result;
2892 }
2893
2894 /*
2895  * CopyReadLineText - inner loop of CopyReadLine for text mode
2896  */
2897 static bool
2898 CopyReadLineText(CopyState cstate)
2899 {
2900         char       *copy_raw_buf;
2901         int                     raw_buf_ptr;
2902         int                     copy_buf_len;
2903         bool            need_data = false;
2904         bool            hit_eof = false;
2905         bool            result = false;
2906         char            mblen_str[2];
2907
2908         /* CSV variables */
2909         bool            first_char_in_line = true;
2910         bool            in_quote = false,
2911                                 last_was_esc = false;
2912         char            quotec = '\0';
2913         char            escapec = '\0';
2914
2915         if (cstate->csv_mode)
2916         {
2917                 quotec = cstate->quote[0];
2918                 escapec = cstate->escape[0];
2919                 /* ignore special escape processing if it's the same as quotec */
2920                 if (quotec == escapec)
2921                         escapec = '\0';
2922         }
2923
2924         mblen_str[1] = '\0';
2925
2926         /*
2927          * The objective of this loop is to transfer the entire next input line
2928          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
2929          * \n) and the end-of-copy marker (\.).
2930          *
2931          * In CSV mode, \r and \n inside a quoted field are just part of the data
2932          * value and are put in line_buf.  We keep just enough state to know if we
2933          * are currently in a quoted field or not.
2934          *
2935          * These four characters, and the CSV escape and quote characters, are
2936          * assumed the same in frontend and backend encodings.
2937          *
2938          * For speed, we try to move data from raw_buf to line_buf in chunks
2939          * rather than one character at a time.  raw_buf_ptr points to the next
2940          * character to examine; any characters from raw_buf_index to raw_buf_ptr
2941          * have been determined to be part of the line, but not yet transferred to
2942          * line_buf.
2943          *
2944          * For a little extra speed within the loop, we copy raw_buf and
2945          * raw_buf_len into local variables.
2946          */
2947         copy_raw_buf = cstate->raw_buf;
2948         raw_buf_ptr = cstate->raw_buf_index;
2949         copy_buf_len = cstate->raw_buf_len;
2950
2951         for (;;)
2952         {
2953                 int                     prev_raw_ptr;
2954                 char            c;
2955
2956                 /*
2957                  * Load more data if needed.  Ideally we would just force four bytes
2958                  * of read-ahead and avoid the many calls to
2959                  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
2960                  * does not allow us to read too far ahead or we might read into the
2961                  * next data, so we read-ahead only as far we know we can.      One
2962                  * optimization would be to read-ahead four byte here if
2963                  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
2964                  * considering the size of the buffer.
2965                  */
2966                 if (raw_buf_ptr >= copy_buf_len || need_data)
2967                 {
2968                         REFILL_LINEBUF;
2969
2970                         /*
2971                          * Try to read some more data.  This will certainly reset
2972                          * raw_buf_index to zero, and raw_buf_ptr must go with it.
2973                          */
2974                         if (!CopyLoadRawBuf(cstate))
2975                                 hit_eof = true;
2976                         raw_buf_ptr = 0;
2977                         copy_buf_len = cstate->raw_buf_len;
2978
2979                         /*
2980                          * If we are completely out of data, break out of the loop,
2981                          * reporting EOF.
2982                          */
2983                         if (copy_buf_len <= 0)
2984                         {
2985                                 result = true;
2986                                 break;
2987                         }
2988                         need_data = false;
2989                 }
2990
2991                 /* OK to fetch a character */
2992                 prev_raw_ptr = raw_buf_ptr;
2993                 c = copy_raw_buf[raw_buf_ptr++];
2994
2995                 if (cstate->csv_mode)
2996                 {
2997                         /*
2998                          * If character is '\\' or '\r', we may need to look ahead below.
2999                          * Force fetch of the next character if we don't already have it.
3000                          * We need to do this before changing CSV state, in case one of
3001                          * these characters is also the quote or escape character.
3002                          *
3003                          * Note: old-protocol does not like forced prefetch, but it's OK
3004                          * here since we cannot validly be at EOF.
3005                          */
3006                         if (c == '\\' || c == '\r')
3007                         {
3008                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3009                         }
3010
3011                         /*
3012                          * Dealing with quotes and escapes here is mildly tricky. If the
3013                          * quote char is also the escape char, there's no problem - we
3014                          * just use the char as a toggle. If they are different, we need
3015                          * to ensure that we only take account of an escape inside a
3016                          * quoted field and immediately preceding a quote char, and not
3017                          * the second in a escape-escape sequence.
3018                          */
3019                         if (in_quote && c == escapec)
3020                                 last_was_esc = !last_was_esc;
3021                         if (c == quotec && !last_was_esc)
3022                                 in_quote = !in_quote;
3023                         if (c != escapec)
3024                                 last_was_esc = false;
3025
3026                         /*
3027                          * Updating the line count for embedded CR and/or LF chars is
3028                          * necessarily a little fragile - this test is probably about the
3029                          * best we can do.      (XXX it's arguable whether we should do this
3030                          * at all --- is cur_lineno a physical or logical count?)
3031                          */
3032                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
3033                                 cstate->cur_lineno++;
3034                 }
3035
3036                 /* Process \r */
3037                 if (c == '\r' && (!cstate->csv_mode || !in_quote))
3038                 {
3039                         /* Check for \r\n on first line, _and_ handle \r\n. */
3040                         if (cstate->eol_type == EOL_UNKNOWN ||
3041                                 cstate->eol_type == EOL_CRNL)
3042                         {
3043                                 /*
3044                                  * If need more data, go back to loop top to load it.
3045                                  *
3046                                  * Note that if we are at EOF, c will wind up as '\0' because
3047                                  * of the guaranteed pad of raw_buf.
3048                                  */
3049                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3050
3051                                 /* get next char */
3052                                 c = copy_raw_buf[raw_buf_ptr];
3053
3054                                 if (c == '\n')
3055                                 {
3056                                         raw_buf_ptr++;          /* eat newline */
3057                                         cstate->eol_type = EOL_CRNL;            /* in case not set yet */
3058                                 }
3059                                 else
3060                                 {
3061                                         /* found \r, but no \n */
3062                                         if (cstate->eol_type == EOL_CRNL)
3063                                                 ereport(ERROR,
3064                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3065                                                                  !cstate->csv_mode ?
3066                                                         errmsg("literal carriage return found in data") :
3067                                                         errmsg("unquoted carriage return found in data"),
3068                                                                  !cstate->csv_mode ?
3069                                                 errhint("Use \"\\r\" to represent carriage return.") :
3070                                                                  errhint("Use quoted CSV field to represent carriage return.")));
3071
3072                                         /*
3073                                          * if we got here, it is the first line and we didn't find
3074                                          * \n, so don't consume the peeked character
3075                                          */
3076                                         cstate->eol_type = EOL_CR;
3077                                 }
3078                         }
3079                         else if (cstate->eol_type == EOL_NL)
3080                                 ereport(ERROR,
3081                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3082                                                  !cstate->csv_mode ?
3083                                                  errmsg("literal carriage return found in data") :
3084                                                  errmsg("unquoted carriage return found in data"),
3085                                                  !cstate->csv_mode ?
3086                                            errhint("Use \"\\r\" to represent carriage return.") :
3087                                                  errhint("Use quoted CSV field to represent carriage return.")));
3088                         /* If reach here, we have found the line terminator */
3089                         break;
3090                 }
3091
3092                 /* Process \n */
3093                 if (c == '\n' && (!cstate->csv_mode || !in_quote))
3094                 {
3095                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
3096                                 ereport(ERROR,
3097                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3098                                                  !cstate->csv_mode ?
3099                                                  errmsg("literal newline found in data") :
3100                                                  errmsg("unquoted newline found in data"),
3101                                                  !cstate->csv_mode ?
3102                                                  errhint("Use \"\\n\" to represent newline.") :
3103                                          errhint("Use quoted CSV field to represent newline.")));
3104                         cstate->eol_type = EOL_NL;      /* in case not set yet */
3105                         /* If reach here, we have found the line terminator */
3106                         break;
3107                 }
3108
3109                 /*
3110                  * In CSV mode, we only recognize \. alone on a line.  This is because
3111                  * \. is a valid CSV data value.
3112                  */
3113                 if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
3114                 {
3115                         char            c2;
3116
3117                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3118                         IF_NEED_REFILL_AND_EOF_BREAK(0);
3119
3120                         /* -----
3121                          * get next character
3122                          * Note: we do not change c so if it isn't \., we can fall
3123                          * through and continue processing for file encoding.
3124                          * -----
3125                          */
3126                         c2 = copy_raw_buf[raw_buf_ptr];
3127
3128                         if (c2 == '.')
3129                         {
3130                                 raw_buf_ptr++;  /* consume the '.' */
3131
3132                                 /*
3133                                  * Note: if we loop back for more data here, it does not
3134                                  * matter that the CSV state change checks are re-executed; we
3135                                  * will come back here with no important state changed.
3136                                  */
3137                                 if (cstate->eol_type == EOL_CRNL)
3138                                 {
3139                                         /* Get the next character */
3140                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3141                                         /* if hit_eof, c2 will become '\0' */
3142                                         c2 = copy_raw_buf[raw_buf_ptr++];
3143
3144                                         if (c2 == '\n')
3145                                         {
3146                                                 if (!cstate->csv_mode)
3147                                                         ereport(ERROR,
3148                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3149                                                                          errmsg("end-of-copy marker does not match previous newline style")));
3150                                                 else
3151                                                         NO_END_OF_COPY_GOTO;
3152                                         }
3153                                         else if (c2 != '\r')
3154                                         {
3155                                                 if (!cstate->csv_mode)
3156                                                         ereport(ERROR,
3157                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3158                                                                          errmsg("end-of-copy marker corrupt")));
3159                                                 else
3160                                                         NO_END_OF_COPY_GOTO;
3161                                         }
3162                                 }
3163
3164                                 /* Get the next character */
3165                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
3166                                 /* if hit_eof, c2 will become '\0' */
3167                                 c2 = copy_raw_buf[raw_buf_ptr++];
3168
3169                                 if (c2 != '\r' && c2 != '\n')
3170                                 {
3171                                         if (!cstate->csv_mode)
3172                                                 ereport(ERROR,
3173                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3174                                                                  errmsg("end-of-copy marker corrupt")));
3175                                         else
3176                                                 NO_END_OF_COPY_GOTO;
3177                                 }
3178
3179                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
3180                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
3181                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
3182                                 {
3183                                         ereport(ERROR,
3184                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3185                                                          errmsg("end-of-copy marker does not match previous newline style")));
3186                                 }
3187
3188                                 /*
3189                                  * Transfer only the data before the \. into line_buf, then
3190                                  * discard the data and the \. sequence.
3191                                  */
3192                                 if (prev_raw_ptr > cstate->raw_buf_index)
3193                                         appendBinaryStringInfo(&cstate->line_buf,
3194                                                                          cstate->raw_buf + cstate->raw_buf_index,
3195                                                                            prev_raw_ptr - cstate->raw_buf_index);
3196                                 cstate->raw_buf_index = raw_buf_ptr;
3197                                 result = true;  /* report EOF */
3198                                 break;
3199                         }
3200                         else if (!cstate->csv_mode)
3201
3202                                 /*
3203                                  * If we are here, it means we found a backslash followed by
3204                                  * something other than a period.  In non-CSV mode, anything
3205                                  * after a backslash is special, so we skip over that second
3206                                  * character too.  If we didn't do that \\. would be
3207                                  * considered an eof-of copy, while in non-CSV mode it is a
3208                                  * literal backslash followed by a period.      In CSV mode,
3209                                  * backslashes are not special, so we want to process the
3210                                  * character after the backslash just like a normal character,
3211                                  * so we don't increment in those cases.
3212                                  */
3213                                 raw_buf_ptr++;
3214                 }
3215
3216                 /*
3217                  * This label is for CSV cases where \. appears at the start of a
3218                  * line, but there is more text after it, meaning it was a data value.
3219                  * We are more strict for \. in CSV mode because \. could be a data
3220                  * value, while in non-CSV mode, \. cannot be a data value.
3221                  */
3222 not_end_of_copy:
3223
3224                 /*
3225                  * Process all bytes of a multi-byte character as a group.
3226                  *
3227                  * We only support multi-byte sequences where the first byte has the
3228                  * high-bit set, so as an optimization we can avoid this block
3229                  * entirely if it is not set.
3230                  */
3231                 if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3232                 {
3233                         int                     mblen;
3234
3235                         mblen_str[0] = c;
3236                         /* All our encodings only read the first byte to get the length */
3237                         mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
3238                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
3239                         IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
3240                         raw_buf_ptr += mblen - 1;
3241                 }
3242                 first_char_in_line = false;
3243         }                                                       /* end of outer loop */
3244
3245         /*
3246          * Transfer any still-uncopied data to line_buf.
3247          */
3248         REFILL_LINEBUF;
3249
3250         return result;
3251 }
3252
3253 /*
3254  *      Return decimal value for a hexadecimal digit
3255  */
3256 static int
3257 GetDecimalFromHex(char hex)
3258 {
3259         if (isdigit((unsigned char) hex))
3260                 return hex - '0';
3261         else
3262                 return tolower((unsigned char) hex) - 'a' + 10;
3263 }
3264
3265 /*
3266  * Parse the current line into separate attributes (fields),
3267  * performing de-escaping as needed.
3268  *
3269  * The input is in line_buf.  We use attribute_buf to hold the result
3270  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
3271  * string, or NULL when the input matches the null marker string.
3272  * This array is expanded as necessary.
3273  *
3274  * (Note that the caller cannot check for nulls since the returned
3275  * string would be the post-de-escaping equivalent, which may look
3276  * the same as some valid data string.)
3277  *
3278  * delim is the column delimiter string (must be just one byte for now).
3279  * null_print is the null marker string.  Note that this is compared to
3280  * the pre-de-escaped input string.
3281  *
3282  * The return value is the number of fields actually read.
3283  */
3284 static int
3285 CopyReadAttributesText(CopyState cstate)
3286 {
3287         char            delimc = cstate->delim[0];
3288         int                     fieldno;
3289         char       *output_ptr;
3290         char       *cur_ptr;
3291         char       *line_end_ptr;
3292
3293         /*
3294          * We need a special case for zero-column tables: check that the input
3295          * line is empty, and return.
3296          */
3297         if (cstate->max_fields <= 0)
3298         {
3299                 if (cstate->line_buf.len != 0)
3300                         ereport(ERROR,
3301                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3302                                          errmsg("extra data after last expected column")));
3303                 return 0;
3304         }
3305
3306         resetStringInfo(&cstate->attribute_buf);
3307
3308         /*
3309          * The de-escaped attributes will certainly not be longer than the input
3310          * data line, so we can just force attribute_buf to be large enough and
3311          * then transfer data without any checks for enough space.      We need to do
3312          * it this way because enlarging attribute_buf mid-stream would invalidate
3313          * pointers already stored into cstate->raw_fields[].
3314          */
3315         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3316                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3317         output_ptr = cstate->attribute_buf.data;
3318
3319         /* set pointer variables for loop */
3320         cur_ptr = cstate->line_buf.data;
3321         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3322
3323         /* Outer loop iterates over fields */
3324         fieldno = 0;
3325         for (;;)
3326         {
3327                 bool            found_delim = false;
3328                 char       *start_ptr;
3329                 char       *end_ptr;
3330                 int                     input_len;
3331                 bool            saw_non_ascii = false;
3332
3333                 /* Make sure there is enough space for the next value */
3334                 if (fieldno >= cstate->max_fields)
3335                 {
3336                         cstate->max_fields *= 2;
3337                         cstate->raw_fields =
3338                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3339                 }
3340
3341                 /* Remember start of field on both input and output sides */
3342                 start_ptr = cur_ptr;
3343                 cstate->raw_fields[fieldno] = output_ptr;
3344
3345                 /*
3346                  * Scan data for field.
3347                  *
3348                  * Note that in this loop, we are scanning to locate the end of field
3349                  * and also speculatively performing de-escaping.  Once we find the
3350                  * end-of-field, we can match the raw field contents against the null
3351                  * marker string.  Only after that comparison fails do we know that
3352                  * de-escaping is actually the right thing to do; therefore we *must
3353                  * not* throw any syntax errors before we've done the null-marker
3354                  * check.
3355                  */
3356                 for (;;)
3357                 {
3358                         char            c;
3359
3360                         end_ptr = cur_ptr;
3361                         if (cur_ptr >= line_end_ptr)
3362                                 break;
3363                         c = *cur_ptr++;
3364                         if (c == delimc)
3365                         {
3366                                 found_delim = true;
3367                                 break;
3368                         }
3369                         if (c == '\\')
3370                         {
3371                                 if (cur_ptr >= line_end_ptr)
3372                                         break;
3373                                 c = *cur_ptr++;
3374                                 switch (c)
3375                                 {
3376                                         case '0':
3377                                         case '1':
3378                                         case '2':
3379                                         case '3':
3380                                         case '4':
3381                                         case '5':
3382                                         case '6':
3383                                         case '7':
3384                                                 {
3385                                                         /* handle \013 */
3386                                                         int                     val;
3387
3388                                                         val = OCTVALUE(c);
3389                                                         if (cur_ptr < line_end_ptr)
3390                                                         {
3391                                                                 c = *cur_ptr;
3392                                                                 if (ISOCTAL(c))
3393                                                                 {
3394                                                                         cur_ptr++;
3395                                                                         val = (val << 3) + OCTVALUE(c);
3396                                                                         if (cur_ptr < line_end_ptr)
3397                                                                         {
3398                                                                                 c = *cur_ptr;
3399                                                                                 if (ISOCTAL(c))
3400                                                                                 {
3401                                                                                         cur_ptr++;
3402                                                                                         val = (val << 3) + OCTVALUE(c);
3403                                                                                 }
3404                                                                         }
3405                                                                 }
3406                                                         }
3407                                                         c = val & 0377;
3408                                                         if (c == '\0' || IS_HIGHBIT_SET(c))
3409                                                                 saw_non_ascii = true;
3410                                                 }
3411                                                 break;
3412                                         case 'x':
3413                                                 /* Handle \x3F */
3414                                                 if (cur_ptr < line_end_ptr)
3415                                                 {
3416                                                         char            hexchar = *cur_ptr;
3417
3418                                                         if (isxdigit((unsigned char) hexchar))
3419                                                         {
3420                                                                 int                     val = GetDecimalFromHex(hexchar);
3421
3422                                                                 cur_ptr++;
3423                                                                 if (cur_ptr < line_end_ptr)
3424                                                                 {
3425                                                                         hexchar = *cur_ptr;
3426                                                                         if (isxdigit((unsigned char) hexchar))
3427                                                                         {
3428                                                                                 cur_ptr++;
3429                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
3430                                                                         }
3431                                                                 }
3432                                                                 c = val & 0xff;
3433                                                                 if (c == '\0' || IS_HIGHBIT_SET(c))
3434                                                                         saw_non_ascii = true;
3435                                                         }
3436                                                 }
3437                                                 break;
3438                                         case 'b':
3439                                                 c = '\b';
3440                                                 break;
3441                                         case 'f':
3442                                                 c = '\f';
3443                                                 break;
3444                                         case 'n':
3445                                                 c = '\n';
3446                                                 break;
3447                                         case 'r':
3448                                                 c = '\r';
3449                                                 break;
3450                                         case 't':
3451                                                 c = '\t';
3452                                                 break;
3453                                         case 'v':
3454                                                 c = '\v';
3455                                                 break;
3456
3457                                                 /*
3458                                                  * in all other cases, take the char after '\'
3459                                                  * literally
3460                                                  */
3461                                 }
3462                         }
3463
3464                         /* Add c to output string */
3465                         *output_ptr++ = c;
3466                 }
3467
3468                 /* Check whether raw input matched null marker */
3469                 input_len = end_ptr - start_ptr;
3470                 if (input_len == cstate->null_print_len &&
3471                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3472                         cstate->raw_fields[fieldno] = NULL;
3473                 else
3474                 {
3475                         /*
3476                          * At this point we know the field is supposed to contain data.
3477                          *
3478                          * If we de-escaped any non-7-bit-ASCII chars, make sure the
3479                          * resulting string is valid data for the db encoding.
3480                          */
3481                         if (saw_non_ascii)
3482                         {
3483                                 char       *fld = cstate->raw_fields[fieldno];
3484
3485                                 pg_verifymbstr(fld, output_ptr - fld, false);
3486                         }
3487                 }
3488
3489                 /* Terminate attribute value in output area */
3490                 *output_ptr++ = '\0';
3491
3492                 fieldno++;
3493                 /* Done if we hit EOL instead of a delim */
3494                 if (!found_delim)
3495                         break;
3496         }
3497
3498         /* Clean up state of attribute_buf */
3499         output_ptr--;
3500         Assert(*output_ptr == '\0');
3501         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3502
3503         return fieldno;
3504 }
3505
3506 /*
3507  * Parse the current line into separate attributes (fields),
3508  * performing de-escaping as needed.  This has exactly the same API as
3509  * CopyReadAttributesText, except we parse the fields according to
3510  * "standard" (i.e. common) CSV usage.
3511  */
3512 static int
3513 CopyReadAttributesCSV(CopyState cstate)
3514 {
3515         char            delimc = cstate->delim[0];
3516         char            quotec = cstate->quote[0];
3517         char            escapec = cstate->escape[0];
3518         int                     fieldno;
3519         char       *output_ptr;
3520         char       *cur_ptr;
3521         char       *line_end_ptr;
3522
3523         /*
3524          * We need a special case for zero-column tables: check that the input
3525          * line is empty, and return.
3526          */
3527         if (cstate->max_fields <= 0)
3528         {
3529                 if (cstate->line_buf.len != 0)
3530                         ereport(ERROR,
3531                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3532                                          errmsg("extra data after last expected column")));
3533                 return 0;
3534         }
3535
3536         resetStringInfo(&cstate->attribute_buf);
3537
3538         /*
3539          * The de-escaped attributes will certainly not be longer than the input
3540          * data line, so we can just force attribute_buf to be large enough and
3541          * then transfer data without any checks for enough space.      We need to do
3542          * it this way because enlarging attribute_buf mid-stream would invalidate
3543          * pointers already stored into cstate->raw_fields[].
3544          */
3545         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
3546                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
3547         output_ptr = cstate->attribute_buf.data;
3548
3549         /* set pointer variables for loop */
3550         cur_ptr = cstate->line_buf.data;
3551         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
3552
3553         /* Outer loop iterates over fields */
3554         fieldno = 0;
3555         for (;;)
3556         {
3557                 bool            found_delim = false;
3558                 bool            saw_quote = false;
3559                 char       *start_ptr;
3560                 char       *end_ptr;
3561                 int                     input_len;
3562
3563                 /* Make sure there is enough space for the next value */
3564                 if (fieldno >= cstate->max_fields)
3565                 {
3566                         cstate->max_fields *= 2;
3567                         cstate->raw_fields =
3568                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
3569                 }
3570
3571                 /* Remember start of field on both input and output sides */
3572                 start_ptr = cur_ptr;
3573                 cstate->raw_fields[fieldno] = output_ptr;
3574
3575                 /*
3576                  * Scan data for field,
3577                  *
3578                  * The loop starts in "not quote" mode and then toggles between that
3579                  * and "in quote" mode. The loop exits normally if it is in "not
3580                  * quote" mode and a delimiter or line end is seen.
3581                  */
3582                 for (;;)
3583                 {
3584                         char            c;
3585
3586                         /* Not in quote */
3587                         for (;;)
3588                         {
3589                                 end_ptr = cur_ptr;
3590                                 if (cur_ptr >= line_end_ptr)
3591                                         goto endfield;
3592                                 c = *cur_ptr++;
3593                                 /* unquoted field delimiter */
3594                                 if (c == delimc)
3595                                 {
3596                                         found_delim = true;
3597                                         goto endfield;
3598                                 }
3599                                 /* start of quoted field (or part of field) */
3600                                 if (c == quotec)
3601                                 {
3602                                         saw_quote = true;
3603                                         break;
3604                                 }
3605                                 /* Add c to output string */
3606                                 *output_ptr++ = c;
3607                         }
3608
3609                         /* In quote */
3610                         for (;;)
3611                         {
3612                                 end_ptr = cur_ptr;
3613                                 if (cur_ptr >= line_end_ptr)
3614                                         ereport(ERROR,
3615                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3616                                                          errmsg("unterminated CSV quoted field")));
3617
3618                                 c = *cur_ptr++;
3619
3620                                 /* escape within a quoted field */
3621                                 if (c == escapec)
3622                                 {
3623                                         /*
3624                                          * peek at the next char if available, and escape it if it
3625                                          * is an escape char or a quote char
3626                                          */
3627                                         if (cur_ptr < line_end_ptr)
3628                                         {
3629                                                 char            nextc = *cur_ptr;
3630
3631                                                 if (nextc == escapec || nextc == quotec)
3632                                                 {
3633                                                         *output_ptr++ = nextc;
3634                                                         cur_ptr++;
3635                                                         continue;
3636                                                 }
3637                                         }
3638                                 }
3639
3640                                 /*
3641                                  * end of quoted field. Must do this test after testing for
3642                                  * escape in case quote char and escape char are the same
3643                                  * (which is the common case).
3644                                  */
3645                                 if (c == quotec)
3646                                         break;
3647
3648                                 /* Add c to output string */
3649                                 *output_ptr++ = c;
3650                         }
3651                 }
3652 endfield:
3653
3654                 /* Terminate attribute value in output area */
3655                 *output_ptr++ = '\0';
3656
3657                 /* Check whether raw input matched null marker */
3658                 input_len = end_ptr - start_ptr;
3659                 if (!saw_quote && input_len == cstate->null_print_len &&
3660                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
3661                         cstate->raw_fields[fieldno] = NULL;
3662
3663                 fieldno++;
3664                 /* Done if we hit EOL instead of a delim */
3665                 if (!found_delim)
3666                         break;
3667         }
3668
3669         /* Clean up state of attribute_buf */
3670         output_ptr--;
3671         Assert(*output_ptr == '\0');
3672         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
3673
3674         return fieldno;
3675 }
3676
3677
3678 /*
3679  * Read a binary attribute
3680  */
3681 static Datum
3682 CopyReadBinaryAttribute(CopyState cstate,
3683                                                 int column_no, FmgrInfo *flinfo,
3684                                                 Oid typioparam, int32 typmod,
3685                                                 bool *isnull)
3686 {
3687         int32           fld_size;
3688         Datum           result;
3689
3690         if (!CopyGetInt32(cstate, &fld_size))
3691                 ereport(ERROR,
3692                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3693                                  errmsg("unexpected EOF in COPY data")));
3694         if (fld_size == -1)
3695         {
3696                 *isnull = true;
3697                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
3698         }
3699         if (fld_size < 0)
3700                 ereport(ERROR,
3701                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3702                                  errmsg("invalid field size")));
3703
3704         /* reset attribute_buf to empty, and load raw data in it */
3705         resetStringInfo(&cstate->attribute_buf);
3706
3707         enlargeStringInfo(&cstate->attribute_buf, fld_size);
3708         if (CopyGetData(cstate, cstate->attribute_buf.data,
3709                                         fld_size, fld_size) != fld_size)
3710                 ereport(ERROR,
3711                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3712                                  errmsg("unexpected EOF in COPY data")));
3713
3714         cstate->attribute_buf.len = fld_size;
3715         cstate->attribute_buf.data[fld_size] = '\0';
3716
3717         /* Call the column type's binary input converter */
3718         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
3719                                                                  typioparam, typmod);
3720
3721         /* Trouble if it didn't eat the whole buffer */
3722         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3723                 ereport(ERROR,
3724                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3725                                  errmsg("incorrect binary data format")));
3726
3727         *isnull = false;
3728         return result;
3729 }
3730
3731 /*
3732  * Send text representation of one attribute, with conversion and escaping
3733  */
3734 #define DUMPSOFAR() \
3735         do { \
3736                 if (ptr > start) \
3737                         CopySendData(cstate, start, ptr - start); \
3738         } while (0)
3739
3740 static void
3741 CopyAttributeOutText(CopyState cstate, char *string)
3742 {
3743         char       *ptr;
3744         char       *start;
3745         char            c;
3746         char            delimc = cstate->delim[0];
3747
3748         if (cstate->need_transcoding)
3749                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
3750         else
3751                 ptr = string;
3752
3753         /*
3754          * We have to grovel through the string searching for control characters
3755          * and instances of the delimiter character.  In most cases, though, these
3756          * are infrequent.      To avoid overhead from calling CopySendData once per
3757          * character, we dump out all characters between escaped characters in a
3758          * single call.  The loop invariant is that the data from "start" to "ptr"
3759          * can be sent literally, but hasn't yet been.
3760          *
3761          * We can skip pg_encoding_mblen() overhead when encoding is safe, because
3762          * in valid backend encodings, extra bytes of a multibyte character never
3763          * look like ASCII.  This loop is sufficiently performance-critical that
3764          * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
3765          * of the normal safe-encoding path.
3766          */
3767         if (cstate->encoding_embeds_ascii)
3768         {
3769                 start = ptr;
3770                 while ((c = *ptr) != '\0')
3771                 {
3772                         if ((unsigned char) c < (unsigned char) 0x20)
3773                         {
3774                                 /*
3775                                  * \r and \n must be escaped, the others are traditional. We
3776                                  * prefer to dump these using the C-like notation, rather than
3777                                  * a backslash and the literal character, because it makes the
3778                                  * dump file a bit more proof against Microsoftish data
3779                                  * mangling.
3780                                  */
3781                                 switch (c)
3782                                 {
3783                                         case '\b':
3784                                                 c = 'b';
3785                                                 break;
3786                                         case '\f':
3787                                                 c = 'f';
3788                                                 break;
3789                                         case '\n':
3790                                                 c = 'n';
3791                                                 break;
3792                                         case '\r':
3793                                                 c = 'r';
3794                                                 break;
3795                                         case '\t':
3796                                                 c = 't';
3797                                                 break;
3798                                         case '\v':
3799                                                 c = 'v';
3800                                                 break;
3801                                         default:
3802                                                 /* If it's the delimiter, must backslash it */
3803                                                 if (c == delimc)
3804                                                         break;
3805                                                 /* All ASCII control chars are length 1 */
3806                                                 ptr++;
3807                                                 continue;               /* fall to end of loop */
3808                                 }
3809                                 /* if we get here, we need to convert the control char */
3810                                 DUMPSOFAR();
3811                                 CopySendChar(cstate, '\\');
3812                                 CopySendChar(cstate, c);
3813                                 start = ++ptr;  /* do not include char in next run */
3814                         }
3815                         else if (c == '\\' || c == delimc)
3816                         {
3817                                 DUMPSOFAR();
3818                                 CopySendChar(cstate, '\\');
3819                                 start = ptr++;  /* we include char in next run */
3820                         }
3821                         else if (IS_HIGHBIT_SET(c))
3822                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
3823                         else
3824                                 ptr++;
3825                 }
3826         }
3827         else
3828         {
3829                 start = ptr;
3830                 while ((c = *ptr) != '\0')
3831                 {
3832                         if ((unsigned char) c < (unsigned char) 0x20)
3833                         {
3834                                 /*
3835                                  * \r and \n must be escaped, the others are traditional. We
3836                                  * prefer to dump these using the C-like notation, rather than
3837                                  * a backslash and the literal character, because it makes the
3838                                  * dump file a bit more proof against Microsoftish data
3839                                  * mangling.
3840                                  */
3841                                 switch (c)
3842                                 {
3843                                         case '\b':
3844                                                 c = 'b';
3845                                                 break;
3846                                         case '\f':
3847                                                 c = 'f';
3848                                                 break;
3849                                         case '\n':
3850                                                 c = 'n';
3851                                                 break;
3852                                         case '\r':
3853                                                 c = 'r';
3854                                                 break;
3855                                         case '\t':
3856                                                 c = 't';
3857                                                 break;
3858                                         case '\v':
3859                                                 c = 'v';
3860                                                 break;
3861                                         default:
3862                                                 /* If it's the delimiter, must backslash it */
3863                                                 if (c == delimc)
3864                                                         break;
3865                                                 /* All ASCII control chars are length 1 */
3866                                                 ptr++;
3867                                                 continue;               /* fall to end of loop */
3868                                 }
3869                                 /* if we get here, we need to convert the control char */
3870                                 DUMPSOFAR();
3871                                 CopySendChar(cstate, '\\');
3872                                 CopySendChar(cstate, c);
3873                                 start = ++ptr;  /* do not include char in next run */
3874                         }
3875                         else if (c == '\\' || c == delimc)
3876                         {
3877                                 DUMPSOFAR();
3878                                 CopySendChar(cstate, '\\');
3879                                 start = ptr++;  /* we include char in next run */
3880                         }
3881                         else
3882                                 ptr++;
3883                 }
3884         }
3885
3886         DUMPSOFAR();
3887 }
3888
3889 /*
3890  * Send text representation of one attribute, with conversion and
3891  * CSV-style escaping
3892  */
3893 static void
3894 CopyAttributeOutCSV(CopyState cstate, char *string,
3895                                         bool use_quote, bool single_attr)
3896 {
3897         char       *ptr;
3898         char       *start;
3899         char            c;
3900         char            delimc = cstate->delim[0];
3901         char            quotec = cstate->quote[0];
3902         char            escapec = cstate->escape[0];
3903
3904         /* force quoting if it matches null_print (before conversion!) */
3905         if (!use_quote && strcmp(string, cstate->null_print) == 0)
3906                 use_quote = true;
3907
3908         if (cstate->need_transcoding)
3909                 ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
3910         else
3911                 ptr = string;
3912
3913         /*
3914          * Make a preliminary pass to discover if it needs quoting
3915          */
3916         if (!use_quote)
3917         {
3918                 /*
3919                  * Because '\.' can be a data value, quote it if it appears alone on a
3920                  * line so it is not interpreted as the end-of-data marker.
3921                  */
3922                 if (single_attr && strcmp(ptr, "\\.") == 0)
3923                         use_quote = true;
3924                 else
3925                 {
3926                         char       *tptr = ptr;
3927
3928                         while ((c = *tptr) != '\0')
3929                         {
3930                                 if (c == delimc || c == quotec || c == '\n' || c == '\r')
3931                                 {
3932                                         use_quote = true;
3933                                         break;
3934                                 }
3935                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3936                                         tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
3937                                 else
3938                                         tptr++;
3939                         }
3940                 }
3941         }
3942
3943         if (use_quote)
3944         {
3945                 CopySendChar(cstate, quotec);
3946
3947                 /*
3948                  * We adopt the same optimization strategy as in CopyAttributeOutText
3949                  */
3950                 start = ptr;
3951                 while ((c = *ptr) != '\0')
3952                 {
3953                         if (c == quotec || c == escapec)
3954                         {
3955                                 DUMPSOFAR();
3956                                 CopySendChar(cstate, escapec);
3957                                 start = ptr;    /* we include char in next run */
3958                         }
3959                         if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3960                                 ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
3961                         else
3962                                 ptr++;
3963                 }
3964                 DUMPSOFAR();
3965
3966                 CopySendChar(cstate, quotec);
3967         }
3968         else
3969         {
3970                 /* If it doesn't need quoting, we can just dump it as-is */
3971                 CopySendString(cstate, ptr);
3972         }
3973 }
3974
3975 /*
3976  * CopyGetAttnums - build an integer list of attnums to be copied
3977  *
3978  * The input attnamelist is either the user-specified column list,
3979  * or NIL if there was none (in which case we want all the non-dropped
3980  * columns).
3981  *
3982  * rel can be NULL ... it's only used for error reports.
3983  */
3984 static List *
3985 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
3986 {
3987         List       *attnums = NIL;
3988
3989         if (attnamelist == NIL)
3990         {
3991                 /* Generate default column list */
3992                 Form_pg_attribute *attr = tupDesc->attrs;
3993                 int                     attr_count = tupDesc->natts;
3994                 int                     i;
3995
3996                 for (i = 0; i < attr_count; i++)
3997                 {
3998                         if (attr[i]->attisdropped)
3999                                 continue;
4000                         attnums = lappend_int(attnums, i + 1);
4001                 }
4002         }
4003         else
4004         {
4005                 /* Validate the user-supplied list and extract attnums */
4006                 ListCell   *l;
4007
4008                 foreach(l, attnamelist)
4009                 {
4010                         char       *name = strVal(lfirst(l));
4011                         int                     attnum;
4012                         int                     i;
4013
4014                         /* Lookup column name */
4015                         attnum = InvalidAttrNumber;
4016                         for (i = 0; i < tupDesc->natts; i++)
4017                         {
4018                                 if (tupDesc->attrs[i]->attisdropped)
4019                                         continue;
4020                                 if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0)
4021                                 {
4022                                         attnum = tupDesc->attrs[i]->attnum;
4023                                         break;
4024                                 }
4025                         }
4026                         if (attnum == InvalidAttrNumber)
4027                         {
4028                                 if (rel != NULL)
4029                                         ereport(ERROR,
4030                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4031                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
4032                                                    name, RelationGetRelationName(rel))));
4033                                 else
4034                                         ereport(ERROR,
4035                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
4036                                                          errmsg("column \"%s\" does not exist",
4037                                                                         name)));
4038                         }
4039                         /* Check for duplicates */
4040                         if (list_member_int(attnums, attnum))
4041                                 ereport(ERROR,
4042                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
4043                                                  errmsg("column \"%s\" specified more than once",
4044                                                                 name)));
4045                         attnums = lappend_int(attnums, attnum);
4046                 }
4047         }
4048
4049         return attnums;
4050 }
4051
4052
4053 /*
4054  * copy_dest_startup --- executor startup
4055  */
4056 static void
4057 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
4058 {
4059         /* no-op */
4060 }
4061
4062 /*
4063  * copy_dest_receive --- receive one tuple
4064  */
4065 static void
4066 copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
4067 {
4068         DR_copy    *myState = (DR_copy *) self;
4069         CopyState       cstate = myState->cstate;
4070
4071         /* Make sure the tuple is fully deconstructed */
4072         slot_getallattrs(slot);
4073
4074         /* And send the data */
4075         CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
4076         myState->processed++;
4077 }
4078
4079 /*
4080  * copy_dest_shutdown --- executor end
4081  */
4082 static void
4083 copy_dest_shutdown(DestReceiver *self)
4084 {
4085         /* no-op */
4086 }
4087
4088 /*
4089  * copy_dest_destroy --- release DestReceiver object
4090  */
4091 static void
4092 copy_dest_destroy(DestReceiver *self)
4093 {
4094         pfree(self);
4095 }
4096
4097 /*
4098  * CreateCopyDestReceiver -- create a suitable DestReceiver object
4099  */
4100 DestReceiver *
4101 CreateCopyDestReceiver(void)
4102 {
4103         DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
4104
4105         self->pub.receiveSlot = copy_dest_receive;
4106         self->pub.rStartup = copy_dest_startup;
4107         self->pub.rShutdown = copy_dest_shutdown;
4108         self->pub.rDestroy = copy_dest_destroy;
4109         self->pub.mydest = DestCopyOut;
4110
4111         self->cstate = NULL;            /* will be set later */
4112         self->processed = 0;
4113
4114         return (DestReceiver *) self;
4115 }