granicus.if.org Git - postgresql/blob - src/backend/commands/copy.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * copy.c
   4  *              Implements the COPY utility command
   5  *
   6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.280 2007/04/16 01:14:55 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18 #include <unistd.h>
  19 #include <sys/stat.h>
  20 #include <netinet/in.h>
  21 #include <arpa/inet.h>
  22
  23 #include "access/heapam.h"
  24 #include "access/xact.h"
  25 #include "catalog/namespace.h"
  26 #include "catalog/pg_type.h"
  27 #include "commands/copy.h"
  28 #include "commands/trigger.h"
  29 #include "executor/executor.h"
  30 #include "libpq/libpq.h"
  31 #include "libpq/pqformat.h"
  32 #include "mb/pg_wchar.h"
  33 #include "miscadmin.h"
  34 #include "optimizer/planner.h"
  35 #include "parser/parse_relation.h"
  36 #include "rewrite/rewriteHandler.h"
  37 #include "storage/fd.h"
  38 #include "tcop/tcopprot.h"
  39 #include "utils/acl.h"
  40 #include "utils/builtins.h"
  41 #include "utils/lsyscache.h"
  42 #include "utils/memutils.h"
  43
  44
  45 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
  46 #define OCTVALUE(c) ((c) - '0')
  47
  48 /*
  49  * Represents the different source/dest cases we need to worry about at
  50  * the bottom level
  51  */
  52 typedef enum CopyDest
  53 {
  54         COPY_FILE,                                      /* to/from file */
  55         COPY_OLD_FE,                            /* to/from frontend (2.0 protocol) */
  56         COPY_NEW_FE                                     /* to/from frontend (3.0 protocol) */
  57 } CopyDest;
  58
  59 /*
  60  *      Represents the end-of-line terminator type of the input
  61  */
  62 typedef enum EolType
  63 {
  64         EOL_UNKNOWN,
  65         EOL_NL,
  66         EOL_CR,
  67         EOL_CRNL
  68 } EolType;
  69
  70 /*
  71  * This struct contains all the state variables used throughout a COPY
  72  * operation. For simplicity, we use the same struct for all variants of COPY,
  73  * even though some fields are used in only some cases.
  74  *
  75  * Multi-byte encodings: all supported client-side encodings encode multi-byte
  76  * characters by having the first byte's high bit set. Subsequent bytes of the
  77  * character can have the high bit not set. When scanning data in such an
  78  * encoding to look for a match to a single-byte (ie ASCII) character, we must
  79  * use the full pg_encoding_mblen() machinery to skip over multibyte
  80  * characters, else we might find a false match to a trailing byte. In
  81  * supported server encodings, there is no possibility of a false match, and
  82  * it's faster to make useless comparisons to trailing bytes than it is to
  83  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
  84  * when we have to do it the hard way.
  85  */
  86 typedef struct CopyStateData
  87 {
  88         /* low-level state data */
  89         CopyDest        copy_dest;              /* type of copy source/destination */
  90         FILE       *copy_file;          /* used if copy_dest == COPY_FILE */
  91         StringInfo      fe_msgbuf;              /* used for all dests during COPY TO, only for
  92                                                                  * dest == COPY_NEW_FE in COPY FROM */
  93         bool            fe_copy;                /* true for all FE copy dests */
  94         bool            fe_eof;                 /* true if detected end of copy data */
  95         EolType         eol_type;               /* EOL type of input */
  96         int                     client_encoding;        /* remote side's character encoding */
  97         bool            need_transcoding;               /* client encoding diff from server? */
  98         bool            encoding_embeds_ascii;  /* ASCII can be non-first byte? */
  99         uint64          processed;              /* # of tuples processed */
 100
 101         /* parameters from the COPY command */
 102         Relation        rel;                    /* relation to copy to or from */
 103         QueryDesc  *queryDesc;          /* executable query to copy from */
 104         List       *attnumlist;         /* integer list of attnums to copy */
 105         char       *filename;           /* filename, or NULL for STDIN/STDOUT */
 106         bool            binary;                 /* binary format? */
 107         bool            oids;                   /* include OIDs? */
 108         bool            csv_mode;               /* Comma Separated Value format? */
 109         bool            header_line;    /* CSV header line? */
 110         char       *null_print;         /* NULL marker string (server encoding!) */
 111         int                     null_print_len; /* length of same */
 112         char       *null_print_client;          /* same converted to client encoding */
 113         char       *delim;                      /* column delimiter (must be 1 byte) */
 114         char       *quote;                      /* CSV quote char (must be 1 byte) */
 115         char       *escape;                     /* CSV escape char (must be 1 byte) */
 116         bool       *force_quote_flags;          /* per-column CSV FQ flags */
 117         bool       *force_notnull_flags;        /* per-column CSV FNN flags */
 118
 119         /* these are just for error messages, see copy_in_error_callback */
 120         const char *cur_relname;        /* table name for error messages */
 121         int                     cur_lineno;             /* line number for error messages */
 122         const char *cur_attname;        /* current att for error messages */
 123         const char *cur_attval;         /* current att value for error messages */
 124
 125         /*
 126          * Working state for COPY TO
 127          */
 128         FmgrInfo   *out_functions;      /* lookup info for output functions */
 129         MemoryContext rowcontext;       /* per-row evaluation context */
 130
 131         /*
 132          * These variables are used to reduce overhead in textual COPY FROM.
 133          *
 134          * attribute_buf holds the separated, de-escaped text for each field of
 135          * the current line.  The CopyReadAttributes functions return arrays of
 136          * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
 137          * the buffer on each cycle.
 138          */
 139         StringInfoData attribute_buf;
 140
 141         /*
 142          * Similarly, line_buf holds the whole input line being processed. The
 143          * input cycle is first to read the whole line into line_buf, convert it
 144          * to server encoding there, and then extract the individual attribute
 145          * fields into attribute_buf.  line_buf is preserved unmodified so that we
 146          * can display it in error messages if appropriate.
 147          */
 148         StringInfoData line_buf;
 149         bool            line_buf_converted;             /* converted to server encoding? */
 150
 151         /*
 152          * Finally, raw_buf holds raw data read from the data source (file or
 153          * client connection).  CopyReadLine parses this data sufficiently to
 154          * locate line boundaries, then transfers the data to line_buf and
 155          * converts it.  Note: we guarantee that there is a \0 at
 156          * raw_buf[raw_buf_len].
 157          */
 158 #define RAW_BUF_SIZE 65536              /* we palloc RAW_BUF_SIZE+1 bytes */
 159         char       *raw_buf;
 160         int                     raw_buf_index;  /* next byte to process */
 161         int                     raw_buf_len;    /* total # of bytes stored */
 162 } CopyStateData;
 163
 164 typedef CopyStateData *CopyState;
 165
 166 /* DestReceiver for COPY (SELECT) TO */
 167 typedef struct
 168 {
 169         DestReceiver pub;                       /* publicly-known function pointers */
 170         CopyState       cstate;                 /* CopyStateData for the command */
 171 } DR_copy;
 172
 173
 174 /*
 175  * These macros centralize code used to process line_buf and raw_buf buffers.
 176  * They are macros because they often do continue/break control and to avoid
 177  * function call overhead in tight COPY loops.
 178  *
 179  * We must use "if (1)" because "do {} while(0)" overrides the continue/break
 180  * processing.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
 181  */
 182
 183 /*
 184  * This keeps the character read at the top of the loop in the buffer
 185  * even if there is more than one read-ahead.
 186  */
 187 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
 188 if (1) \
 189 { \
 190         if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
 191         { \
 192                 raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
 193                 need_data = true; \
 194                 continue; \
 195         } \
 196 } else
 197
 198
 199 /* This consumes the remainder of the buffer and breaks */
 200 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
 201 if (1) \
 202 { \
 203         if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
 204         { \
 205                 if (extralen) \
 206                         raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
 207                 /* backslash just before EOF, treat as data char */ \
 208                 result = true; \
 209                 break; \
 210         } \
 211 } else
 212
 213
 214 /*
 215  * Transfer any approved data to line_buf; must do this to be sure
 216  * there is some room in raw_buf.
 217  */
 218 #define REFILL_LINEBUF \
 219 if (1) \
 220 { \
 221         if (raw_buf_ptr > cstate->raw_buf_index) \
 222         { \
 223                 appendBinaryStringInfo(&cstate->line_buf, \
 224                                                          cstate->raw_buf + cstate->raw_buf_index, \
 225                                                            raw_buf_ptr - cstate->raw_buf_index); \
 226                 cstate->raw_buf_index = raw_buf_ptr; \
 227         } \
 228 } else
 229
 230 /* Undo any read-ahead and jump out of the block. */
 231 #define NO_END_OF_COPY_GOTO \
 232 if (1) \
 233 { \
 234         raw_buf_ptr = prev_raw_ptr + 1; \
 235         goto not_end_of_copy; \
 236 } else
 237
 238
 239 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 240
 241
 242 /* non-export function prototypes */
 243 static void DoCopyTo(CopyState cstate);
 244 static void CopyTo(CopyState cstate);
 245 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
 246                          Datum *values, bool *nulls);
 247 static void CopyFrom(CopyState cstate);
 248 static bool CopyReadLine(CopyState cstate);
 249 static bool CopyReadLineText(CopyState cstate);
 250 static int CopyReadAttributesText(CopyState cstate, int maxfields,
 251                                            char **fieldvals);
 252 static int CopyReadAttributesCSV(CopyState cstate, int maxfields,
 253                                           char **fieldvals);
 254 static Datum CopyReadBinaryAttribute(CopyState cstate,
 255                                                 int column_no, FmgrInfo *flinfo,
 256                                                 Oid typioparam, int32 typmod,
 257                                                 bool *isnull);
 258 static void CopyAttributeOutText(CopyState cstate, char *string);
 259 static void CopyAttributeOutCSV(CopyState cstate, char *string,
 260                                         bool use_quote, bool single_attr);
 261 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
 262                            List *attnamelist);
 263 static char *limit_printout_length(const char *str);
 264
 265 /* Low-level communications functions */
 266 static void SendCopyBegin(CopyState cstate);
 267 static void ReceiveCopyBegin(CopyState cstate);
 268 static void SendCopyEnd(CopyState cstate);
 269 static void CopySendData(CopyState cstate, void *databuf, int datasize);
 270 static void CopySendString(CopyState cstate, const char *str);
 271 static void CopySendChar(CopyState cstate, char c);
 272 static void CopySendEndOfRow(CopyState cstate);
 273 static int CopyGetData(CopyState cstate, void *databuf,
 274                         int minread, int maxread);
 275 static void CopySendInt32(CopyState cstate, int32 val);
 276 static bool CopyGetInt32(CopyState cstate, int32 *val);
 277 static void CopySendInt16(CopyState cstate, int16 val);
 278 static bool CopyGetInt16(CopyState cstate, int16 *val);
 279
 280
 281 /*
 282  * Send copy start/stop messages for frontend copies.  These have changed
 283  * in past protocol redesigns.
 284  */
 285 static void
 286 SendCopyBegin(CopyState cstate)
 287 {
 288         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
 289         {
 290                 /* new way */
 291                 StringInfoData buf;
 292                 int                     natts = list_length(cstate->attnumlist);
 293                 int16           format = (cstate->binary ? 1 : 0);
 294                 int                     i;
 295
 296                 pq_beginmessage(&buf, 'H');
 297                 pq_sendbyte(&buf, format);              /* overall format */
 298                 pq_sendint(&buf, natts, 2);
 299                 for (i = 0; i < natts; i++)
 300                         pq_sendint(&buf, format, 2);            /* per-column formats */
 301                 pq_endmessage(&buf);
 302                 cstate->copy_dest = COPY_NEW_FE;
 303         }
 304         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 305         {
 306                 /* old way */
 307                 if (cstate->binary)
 308                         ereport(ERROR,
 309                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 310                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 311                 pq_putemptymessage('H');
 312                 /* grottiness needed for old COPY OUT protocol */
 313                 pq_startcopyout();
 314                 cstate->copy_dest = COPY_OLD_FE;
 315         }
 316         else
 317         {
 318                 /* very old way */
 319                 if (cstate->binary)
 320                         ereport(ERROR,
 321                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 322                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 323                 pq_putemptymessage('B');
 324                 /* grottiness needed for old COPY OUT protocol */
 325                 pq_startcopyout();
 326                 cstate->copy_dest = COPY_OLD_FE;
 327         }
 328 }
 329
 330 static void
 331 ReceiveCopyBegin(CopyState cstate)
 332 {
 333         if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
 334         {
 335                 /* new way */
 336                 StringInfoData buf;
 337                 int                     natts = list_length(cstate->attnumlist);
 338                 int16           format = (cstate->binary ? 1 : 0);
 339                 int                     i;
 340
 341                 pq_beginmessage(&buf, 'G');
 342                 pq_sendbyte(&buf, format);              /* overall format */
 343                 pq_sendint(&buf, natts, 2);
 344                 for (i = 0; i < natts; i++)
 345                         pq_sendint(&buf, format, 2);            /* per-column formats */
 346                 pq_endmessage(&buf);
 347                 cstate->copy_dest = COPY_NEW_FE;
 348                 cstate->fe_msgbuf = makeStringInfo();
 349         }
 350         else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 351         {
 352                 /* old way */
 353                 if (cstate->binary)
 354                         ereport(ERROR,
 355                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 356                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 357                 pq_putemptymessage('G');
 358                 cstate->copy_dest = COPY_OLD_FE;
 359         }
 360         else
 361         {
 362                 /* very old way */
 363                 if (cstate->binary)
 364                         ereport(ERROR,
 365                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 366                         errmsg("COPY BINARY is not supported to stdout or from stdin")));
 367                 pq_putemptymessage('D');
 368                 cstate->copy_dest = COPY_OLD_FE;
 369         }
 370         /* We *must* flush here to ensure FE knows it can send. */
 371         pq_flush();
 372 }
 373
 374 static void
 375 SendCopyEnd(CopyState cstate)
 376 {
 377         if (cstate->copy_dest == COPY_NEW_FE)
 378         {
 379                 /* Shouldn't have any unsent data */
 380                 Assert(cstate->fe_msgbuf->len == 0);
 381                 /* Send Copy Done message */
 382                 pq_putemptymessage('c');
 383         }
 384         else
 385         {
 386                 CopySendData(cstate, "\\.", 2);
 387                 /* Need to flush out the trailer (this also appends a newline) */
 388                 CopySendEndOfRow(cstate);
 389                 pq_endcopyout(false);
 390         }
 391 }
 392
 393 /*----------
 394  * CopySendData sends output data to the destination (file or frontend)
 395  * CopySendString does the same for null-terminated strings
 396  * CopySendChar does the same for single characters
 397  * CopySendEndOfRow does the appropriate thing at end of each data row
 398  *      (data is not actually flushed except by CopySendEndOfRow)
 399  *
 400  * NB: no data conversion is applied by these functions
 401  *----------
 402  */
 403 static void
 404 CopySendData(CopyState cstate, void *databuf, int datasize)
 405 {
 406         appendBinaryStringInfo(cstate->fe_msgbuf, (char *) databuf, datasize);
 407 }
 408
 409 static void
 410 CopySendString(CopyState cstate, const char *str)
 411 {
 412         appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
 413 }
 414
 415 static void
 416 CopySendChar(CopyState cstate, char c)
 417 {
 418         appendStringInfoCharMacro(cstate->fe_msgbuf, c);
 419 }
 420
 421 static void
 422 CopySendEndOfRow(CopyState cstate)
 423 {
 424         StringInfo      fe_msgbuf = cstate->fe_msgbuf;
 425
 426         switch (cstate->copy_dest)
 427         {
 428                 case COPY_FILE:
 429                         if (!cstate->binary)
 430                         {
 431                                 /* Default line termination depends on platform */
 432 #ifndef WIN32
 433                                 CopySendChar(cstate, '\n');
 434 #else
 435                                 CopySendString(cstate, "\r\n");
 436 #endif
 437                         }
 438
 439                         (void) fwrite(fe_msgbuf->data, fe_msgbuf->len,
 440                                                   1, cstate->copy_file);
 441                         if (ferror(cstate->copy_file))
 442                                 ereport(ERROR,
 443                                                 (errcode_for_file_access(),
 444                                                  errmsg("could not write to COPY file: %m")));
 445                         break;
 446                 case COPY_OLD_FE:
 447                         /* The FE/BE protocol uses \n as newline for all platforms */
 448                         if (!cstate->binary)
 449                                 CopySendChar(cstate, '\n');
 450
 451                         if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
 452                         {
 453                                 /* no hope of recovering connection sync, so FATAL */
 454                                 ereport(FATAL,
 455                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 456                                                  errmsg("connection lost during COPY to stdout")));
 457                         }
 458                         break;
 459                 case COPY_NEW_FE:
 460                         /* The FE/BE protocol uses \n as newline for all platforms */
 461                         if (!cstate->binary)
 462                                 CopySendChar(cstate, '\n');
 463
 464                         /* Dump the accumulated row as one CopyData message */
 465                         (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
 466                         break;
 467         }
 468
 469         resetStringInfo(fe_msgbuf);
 470 }
 471
 472 /*
 473  * CopyGetData reads data from the source (file or frontend)
 474  *
 475  * We attempt to read at least minread, and at most maxread, bytes from
 476  * the source.  The actual number of bytes read is returned; if this is
 477  * less than minread, EOF was detected.
 478  *
 479  * Note: when copying from the frontend, we expect a proper EOF mark per
 480  * protocol; if the frontend simply drops the connection, we raise error.
 481  * It seems unwise to allow the COPY IN to complete normally in that case.
 482  *
 483  * NB: no data conversion is applied here.
 484  */
 485 static int
 486 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
 487 {
 488         int                     bytesread = 0;
 489
 490         switch (cstate->copy_dest)
 491         {
 492                 case COPY_FILE:
 493                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
 494                         if (ferror(cstate->copy_file))
 495                                 ereport(ERROR,
 496                                                 (errcode_for_file_access(),
 497                                                  errmsg("could not read from COPY file: %m")));
 498                         break;
 499                 case COPY_OLD_FE:
 500
 501                         /*
 502                          * We cannot read more than minread bytes (which in practice is 1)
 503                          * because old protocol doesn't have any clear way of separating
 504                          * the COPY stream from following data.  This is slow, but not any
 505                          * slower than the code path was originally, and we don't care
 506                          * much anymore about the performance of old protocol.
 507                          */
 508                         if (pq_getbytes((char *) databuf, minread))
 509                         {
 510                                 /* Only a \. terminator is legal EOF in old protocol */
 511                                 ereport(ERROR,
 512                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 513                                                  errmsg("unexpected EOF on client connection")));
 514                         }
 515                         bytesread = minread;
 516                         break;
 517                 case COPY_NEW_FE:
 518                         while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
 519                         {
 520                                 int                     avail;
 521
 522                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
 523                                 {
 524                                         /* Try to receive another message */
 525                                         int                     mtype;
 526
 527                         readmessage:
 528                                         mtype = pq_getbyte();
 529                                         if (mtype == EOF)
 530                                                 ereport(ERROR,
 531                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 532                                                          errmsg("unexpected EOF on client connection")));
 533                                         if (pq_getmessage(cstate->fe_msgbuf, 0))
 534                                                 ereport(ERROR,
 535                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
 536                                                          errmsg("unexpected EOF on client connection")));
 537                                         switch (mtype)
 538                                         {
 539                                                 case 'd':               /* CopyData */
 540                                                         break;
 541                                                 case 'c':               /* CopyDone */
 542                                                         /* COPY IN correctly terminated by frontend */
 543                                                         cstate->fe_eof = true;
 544                                                         return bytesread;
 545                                                 case 'f':               /* CopyFail */
 546                                                         ereport(ERROR,
 547                                                                         (errcode(ERRCODE_QUERY_CANCELED),
 548                                                                          errmsg("COPY from stdin failed: %s",
 549                                                                            pq_getmsgstring(cstate->fe_msgbuf))));
 550                                                         break;
 551                                                 case 'H':               /* Flush */
 552                                                 case 'S':               /* Sync */
 553
 554                                                         /*
 555                                                          * Ignore Flush/Sync for the convenience of client
 556                                                          * libraries (such as libpq) that may send those
 557                                                          * without noticing that the command they just
 558                                                          * sent was COPY.
 559                                                          */
 560                                                         goto readmessage;
 561                                                 default:
 562                                                         ereport(ERROR,
 563                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
 564                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
 565                                                                                         mtype)));
 566                                                         break;
 567                                         }
 568                                 }
 569                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
 570                                 if (avail > maxread)
 571                                         avail = maxread;
 572                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
 573                                 databuf = (void *) ((char *) databuf + avail);
 574                                 maxread -= avail;
 575                                 bytesread += avail;
 576                         }
 577                         break;
 578         }
 579
 580         return bytesread;
 581 }
 582
 583
 584 /*
 585  * These functions do apply some data conversion
 586  */
 587
 588 /*
 589  * CopySendInt32 sends an int32 in network byte order
 590  */
 591 static void
 592 CopySendInt32(CopyState cstate, int32 val)
 593 {
 594         uint32          buf;
 595
 596         buf = htonl((uint32) val);
 597         CopySendData(cstate, &buf, sizeof(buf));
 598 }
 599
 600 /*
 601  * CopyGetInt32 reads an int32 that appears in network byte order
 602  *
 603  * Returns true if OK, false if EOF
 604  */
 605 static bool
 606 CopyGetInt32(CopyState cstate, int32 *val)
 607 {
 608         uint32          buf;
 609
 610         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
 611         {
 612                 *val = 0;                               /* suppress compiler warning */
 613                 return false;
 614         }
 615         *val = (int32) ntohl(buf);
 616         return true;
 617 }
 618
 619 /*
 620  * CopySendInt16 sends an int16 in network byte order
 621  */
 622 static void
 623 CopySendInt16(CopyState cstate, int16 val)
 624 {
 625         uint16          buf;
 626
 627         buf = htons((uint16) val);
 628         CopySendData(cstate, &buf, sizeof(buf));
 629 }
 630
 631 /*
 632  * CopyGetInt16 reads an int16 that appears in network byte order
 633  */
 634 static bool
 635 CopyGetInt16(CopyState cstate, int16 *val)
 636 {
 637         uint16          buf;
 638
 639         if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
 640         {
 641                 *val = 0;                               /* suppress compiler warning */
 642                 return false;
 643         }
 644         *val = (int16) ntohs(buf);
 645         return true;
 646 }
 647
 648
 649 /*
 650  * CopyLoadRawBuf loads some more data into raw_buf
 651  *
 652  * Returns TRUE if able to obtain at least one more byte, else FALSE.
 653  *
 654  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
 655  * down to the start of the buffer and then we load more data after that.
 656  * This case is used only when a frontend multibyte character crosses a
 657  * bufferload boundary.
 658  */
 659 static bool
 660 CopyLoadRawBuf(CopyState cstate)
 661 {
 662         int                     nbytes;
 663         int                     inbytes;
 664
 665         if (cstate->raw_buf_index < cstate->raw_buf_len)
 666         {
 667                 /* Copy down the unprocessed data */
 668                 nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
 669                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
 670                                 nbytes);
 671         }
 672         else
 673                 nbytes = 0;                             /* no data need be saved */
 674
 675         inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
 676                                                   1, RAW_BUF_SIZE - nbytes);
 677         nbytes += inbytes;
 678         cstate->raw_buf[nbytes] = '\0';
 679         cstate->raw_buf_index = 0;
 680         cstate->raw_buf_len = nbytes;
 681         return (inbytes > 0);
 682 }
 683
 684
 685 /*
 686  *       DoCopy executes the SQL COPY statement
 687  *
 688  * Either unload or reload contents of table <relation>, depending on <from>.
 689  * (<from> = TRUE means we are inserting into the table.)  In the "TO" case
 690  * we also support copying the output of an arbitrary SELECT query.
 691  *
 692  * If <pipe> is false, transfer is between the table and the file named
 693  * <filename>.  Otherwise, transfer is between the table and our regular
 694  * input/output stream. The latter could be either stdin/stdout or a
 695  * socket, depending on whether we're running under Postmaster control.
 696  *
 697  * Iff <binary>, unload or reload in the binary format, as opposed to the
 698  * more wasteful but more robust and portable text format.
 699  *
 700  * Iff <oids>, unload or reload the format that includes OID information.
 701  * On input, we accept OIDs whether or not the table has an OID column,
 702  * but silently drop them if it does not.  On output, we report an error
 703  * if the user asks for OIDs in a table that has none (not providing an
 704  * OID column might seem friendlier, but could seriously confuse programs).
 705  *
 706  * If in the text format, delimit columns with delimiter <delim> and print
 707  * NULL values as <null_print>.
 708  *
 709  * Do not allow a Postgres user without superuser privilege to read from
 710  * or write to a file.
 711  *
 712  * Do not allow the copy if user doesn't have proper permission to access
 713  * the table.
 714  */
 715 uint64
 716 DoCopy(const CopyStmt *stmt, const char *queryString)
 717 {
 718         CopyState       cstate;
 719         bool            is_from = stmt->is_from;
 720         bool            pipe = (stmt->filename == NULL);
 721         List       *attnamelist = stmt->attlist;
 722         List       *force_quote = NIL;
 723         List       *force_notnull = NIL;
 724         AclMode         required_access = (is_from ? ACL_INSERT : ACL_SELECT);
 725         AclResult       aclresult;
 726         ListCell   *option;
 727         TupleDesc       tupDesc;
 728         int                     num_phys_attrs;
 729         uint64          processed;
 730
 731         /* Allocate workspace and zero all fields */
 732         cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
 733
 734         /* Extract options from the statement node tree */
 735         foreach(option, stmt->options)
 736         {
 737                 DefElem    *defel = (DefElem *) lfirst(option);
 738
 739                 if (strcmp(defel->defname, "binary") == 0)
 740                 {
 741                         if (cstate->binary)
 742                                 ereport(ERROR,
 743                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 744                                                  errmsg("conflicting or redundant options")));
 745                         cstate->binary = intVal(defel->arg);
 746                 }
 747                 else if (strcmp(defel->defname, "oids") == 0)
 748                 {
 749                         if (cstate->oids)
 750                                 ereport(ERROR,
 751                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 752                                                  errmsg("conflicting or redundant options")));
 753                         cstate->oids = intVal(defel->arg);
 754                 }
 755                 else if (strcmp(defel->defname, "delimiter") == 0)
 756                 {
 757                         if (cstate->delim)
 758                                 ereport(ERROR,
 759                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 760                                                  errmsg("conflicting or redundant options")));
 761                         cstate->delim = strVal(defel->arg);
 762                 }
 763                 else if (strcmp(defel->defname, "null") == 0)
 764                 {
 765                         if (cstate->null_print)
 766                                 ereport(ERROR,
 767                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 768                                                  errmsg("conflicting or redundant options")));
 769                         cstate->null_print = strVal(defel->arg);
 770                 }
 771                 else if (strcmp(defel->defname, "csv") == 0)
 772                 {
 773                         if (cstate->csv_mode)
 774                                 ereport(ERROR,
 775                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 776                                                  errmsg("conflicting or redundant options")));
 777                         cstate->csv_mode = intVal(defel->arg);
 778                 }
 779                 else if (strcmp(defel->defname, "header") == 0)
 780                 {
 781                         if (cstate->header_line)
 782                                 ereport(ERROR,
 783                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 784                                                  errmsg("conflicting or redundant options")));
 785                         cstate->header_line = intVal(defel->arg);
 786                 }
 787                 else if (strcmp(defel->defname, "quote") == 0)
 788                 {
 789                         if (cstate->quote)
 790                                 ereport(ERROR,
 791                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 792                                                  errmsg("conflicting or redundant options")));
 793                         cstate->quote = strVal(defel->arg);
 794                 }
 795                 else if (strcmp(defel->defname, "escape") == 0)
 796                 {
 797                         if (cstate->escape)
 798                                 ereport(ERROR,
 799                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 800                                                  errmsg("conflicting or redundant options")));
 801                         cstate->escape = strVal(defel->arg);
 802                 }
 803                 else if (strcmp(defel->defname, "force_quote") == 0)
 804                 {
 805                         if (force_quote)
 806                                 ereport(ERROR,
 807                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 808                                                  errmsg("conflicting or redundant options")));
 809                         force_quote = (List *) defel->arg;
 810                 }
 811                 else if (strcmp(defel->defname, "force_notnull") == 0)
 812                 {
 813                         if (force_notnull)
 814                                 ereport(ERROR,
 815                                                 (errcode(ERRCODE_SYNTAX_ERROR),
 816                                                  errmsg("conflicting or redundant options")));
 817                         force_notnull = (List *) defel->arg;
 818                 }
 819                 else
 820                         elog(ERROR, "option \"%s\" not recognized",
 821                                  defel->defname);
 822         }
 823
 824         /* Check for incompatible options */
 825         if (cstate->binary && cstate->delim)
 826                 ereport(ERROR,
 827                                 (errcode(ERRCODE_SYNTAX_ERROR),
 828                                  errmsg("cannot specify DELIMITER in BINARY mode")));
 829
 830         if (cstate->binary && cstate->csv_mode)
 831                 ereport(ERROR,
 832                                 (errcode(ERRCODE_SYNTAX_ERROR),
 833                                  errmsg("cannot specify CSV in BINARY mode")));
 834
 835         if (cstate->binary && cstate->null_print)
 836                 ereport(ERROR,
 837                                 (errcode(ERRCODE_SYNTAX_ERROR),
 838                                  errmsg("cannot specify NULL in BINARY mode")));
 839
 840         /* Set defaults for omitted options */
 841         if (!cstate->delim)
 842                 cstate->delim = cstate->csv_mode ? "," : "\t";
 843
 844         if (!cstate->null_print)
 845                 cstate->null_print = cstate->csv_mode ? "" : "\\N";
 846         cstate->null_print_len = strlen(cstate->null_print);
 847
 848         if (cstate->csv_mode)
 849         {
 850                 if (!cstate->quote)
 851                         cstate->quote = "\"";
 852                 if (!cstate->escape)
 853                         cstate->escape = cstate->quote;
 854         }
 855
 856         /* Only single-character delimiter strings are supported. */
 857         if (strlen(cstate->delim) != 1)
 858                 ereport(ERROR,
 859                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 860                                  errmsg("COPY delimiter must be a single character")));
 861
 862         /* Disallow end-of-line characters */
 863         if (strchr(cstate->delim, '\r') != NULL ||
 864                 strchr(cstate->delim, '\n') != NULL)
 865                 ereport(ERROR,
 866                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 867                          errmsg("COPY delimiter cannot be newline or carriage return")));
 868
 869         if (strchr(cstate->null_print, '\r') != NULL ||
 870                 strchr(cstate->null_print, '\n') != NULL)
 871                 ereport(ERROR,
 872                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 873                                  errmsg("COPY null representation cannot use newline or carriage return")));
 874
 875         /* Disallow backslash in non-CSV mode */
 876         if (!cstate->csv_mode && strchr(cstate->delim, '\\') != NULL)
 877                 ereport(ERROR,
 878                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 879                                  errmsg("COPY delimiter cannot be backslash")));
 880
 881         /* Check header */
 882         if (!cstate->csv_mode && cstate->header_line)
 883                 ereport(ERROR,
 884                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 885                                  errmsg("COPY HEADER available only in CSV mode")));
 886
 887         /* Check quote */
 888         if (!cstate->csv_mode && cstate->quote != NULL)
 889                 ereport(ERROR,
 890                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 891                                  errmsg("COPY quote available only in CSV mode")));
 892
 893         if (cstate->csv_mode && strlen(cstate->quote) != 1)
 894                 ereport(ERROR,
 895                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 896                                  errmsg("COPY quote must be a single character")));
 897
 898         /* Check escape */
 899         if (!cstate->csv_mode && cstate->escape != NULL)
 900                 ereport(ERROR,
 901                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 902                                  errmsg("COPY escape available only in CSV mode")));
 903
 904         if (cstate->csv_mode && strlen(cstate->escape) != 1)
 905                 ereport(ERROR,
 906                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 907                                  errmsg("COPY escape must be a single character")));
 908
 909         /* Check force_quote */
 910         if (!cstate->csv_mode && force_quote != NIL)
 911                 ereport(ERROR,
 912                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 913                                  errmsg("COPY force quote available only in CSV mode")));
 914         if (force_quote != NIL && is_from)
 915                 ereport(ERROR,
 916                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 917                                  errmsg("COPY force quote only available using COPY TO")));
 918
 919         /* Check force_notnull */
 920         if (!cstate->csv_mode && force_notnull != NIL)
 921                 ereport(ERROR,
 922                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 923                                  errmsg("COPY force not null available only in CSV mode")));
 924         if (force_notnull != NIL && !is_from)
 925                 ereport(ERROR,
 926                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 927                           errmsg("COPY force not null only available using COPY FROM")));
 928
 929         /* Don't allow the delimiter to appear in the null string. */
 930         if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
 931                 ereport(ERROR,
 932                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 933                 errmsg("COPY delimiter must not appear in the NULL specification")));
 934
 935         /* Don't allow the CSV quote char to appear in the null string. */
 936         if (cstate->csv_mode &&
 937                 strchr(cstate->null_print, cstate->quote[0]) != NULL)
 938                 ereport(ERROR,
 939                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 940                                  errmsg("CSV quote character must not appear in the NULL specification")));
 941
 942         /* Disallow file COPY except to superusers. */
 943         if (!pipe && !superuser())
 944                 ereport(ERROR,
 945                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 946                                  errmsg("must be superuser to COPY to or from a file"),
 947                                  errhint("Anyone can COPY to stdout or from stdin. "
 948                                                  "psql's \\copy command also works for anyone.")));
 949
 950         if (stmt->relation)
 951         {
 952                 Assert(!stmt->query);
 953                 cstate->queryDesc = NULL;
 954
 955                 /* Open and lock the relation, using the appropriate lock type. */
 956                 cstate->rel = heap_openrv(stmt->relation,
 957                                                          (is_from ? RowExclusiveLock : AccessShareLock));
 958
 959                 /* Check relation permissions. */
 960                 aclresult = pg_class_aclcheck(RelationGetRelid(cstate->rel),
 961                                                                           GetUserId(),
 962                                                                           required_access);
 963                 if (aclresult != ACLCHECK_OK)
 964                         aclcheck_error(aclresult, ACL_KIND_CLASS,
 965                                                    RelationGetRelationName(cstate->rel));
 966
 967                 /* check read-only transaction */
 968                 if (XactReadOnly && is_from &&
 969                         !isTempNamespace(RelationGetNamespace(cstate->rel)))
 970                         ereport(ERROR,
 971                                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
 972                                          errmsg("transaction is read-only")));
 973
 974                 /* Don't allow COPY w/ OIDs to or from a table without them */
 975                 if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
 976                         ereport(ERROR,
 977                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 978                                          errmsg("table \"%s\" does not have OIDs",
 979                                                         RelationGetRelationName(cstate->rel))));
 980
 981                 tupDesc = RelationGetDescr(cstate->rel);
 982         }
 983         else
 984         {
 985                 List       *rewritten;
 986                 Query      *query;
 987                 PlannedStmt *plan;
 988                 DestReceiver *dest;
 989
 990                 Assert(!is_from);
 991                 cstate->rel = NULL;
 992
 993                 /* Don't allow COPY w/ OIDs from a select */
 994                 if (cstate->oids)
 995                         ereport(ERROR,
 996                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 997                                          errmsg("COPY (SELECT) WITH OIDS is not supported")));
 998
 999                 /*
1000                  * Run parse analysis and rewrite.  Note this also acquires sufficient
1001                  * locks on the source table(s).
1002                  *
1003                  * Because the parser and planner tend to scribble on their input, we
1004                  * make a preliminary copy of the source querytree.  This prevents
1005                  * problems in the case that the COPY is in a portal or plpgsql
1006                  * function and is executed repeatedly.  (See also the same hack in
1007                  * DECLARE CURSOR and PREPARE.)  XXX FIXME someday.
1008                  */
1009                 rewritten = pg_analyze_and_rewrite((Node *) copyObject(stmt->query),
1010                                                                                    queryString, NULL, 0);
1011
1012                 /* We don't expect more or less than one result query */
1013                 if (list_length(rewritten) != 1)
1014                         elog(ERROR, "unexpected rewrite result");
1015
1016                 query = (Query *) linitial(rewritten);
1017                 Assert(query->commandType == CMD_SELECT);
1018
1019                 /* Query mustn't use INTO, either */
1020                 if (query->into)
1021                         ereport(ERROR,
1022                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1023                                          errmsg("COPY (SELECT INTO) is not supported")));
1024
1025                 /* plan the query */
1026                 plan = planner(query, 0, NULL);
1027
1028                 /*
1029                  * Update snapshot command ID to ensure this query sees results of any
1030                  * previously executed queries.  (It's a bit cheesy to modify
1031                  * ActiveSnapshot without making a copy, but for the limited ways in
1032                  * which COPY can be invoked, I think it's OK, because the active
1033                  * snapshot shouldn't be shared with anything else anyway.)
1034                  */
1035                 ActiveSnapshot->curcid = GetCurrentCommandId();
1036
1037                 /* Create dest receiver for COPY OUT */
1038                 dest = CreateDestReceiver(DestCopyOut, NULL);
1039                 ((DR_copy *) dest)->cstate = cstate;
1040
1041                 /* Create a QueryDesc requesting no output */
1042                 cstate->queryDesc = CreateQueryDesc(plan,
1043                                                                                         ActiveSnapshot, InvalidSnapshot,
1044                                                                                         dest, NULL, false);
1045
1046                 /*
1047                  * Call ExecutorStart to prepare the plan for execution.
1048                  *
1049                  * ExecutorStart computes a result tupdesc for us
1050                  */
1051                 ExecutorStart(cstate->queryDesc, 0);
1052
1053                 tupDesc = cstate->queryDesc->tupDesc;
1054         }
1055
1056         /* Generate or convert list of attributes to process */
1057         cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1058
1059         num_phys_attrs = tupDesc->natts;
1060
1061         /* Convert FORCE QUOTE name list to per-column flags, check validity */
1062         cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1063         if (force_quote)
1064         {
1065                 List       *attnums;
1066                 ListCell   *cur;
1067
1068                 attnums = CopyGetAttnums(tupDesc, cstate->rel, force_quote);
1069
1070                 foreach(cur, attnums)
1071                 {
1072                         int                     attnum = lfirst_int(cur);
1073
1074                         if (!list_member_int(cstate->attnumlist, attnum))
1075                                 ereport(ERROR,
1076                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1077                                    errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
1078                                                   NameStr(tupDesc->attrs[attnum - 1]->attname))));
1079                         cstate->force_quote_flags[attnum - 1] = true;
1080                 }
1081         }
1082
1083         /* Convert FORCE NOT NULL name list to per-column flags, check validity */
1084         cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1085         if (force_notnull)
1086         {
1087                 List       *attnums;
1088                 ListCell   *cur;
1089
1090                 attnums = CopyGetAttnums(tupDesc, cstate->rel, force_notnull);
1091
1092                 foreach(cur, attnums)
1093                 {
1094                         int                     attnum = lfirst_int(cur);
1095
1096                         if (!list_member_int(cstate->attnumlist, attnum))
1097                                 ereport(ERROR,
1098                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1099                                 errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
1100                                            NameStr(tupDesc->attrs[attnum - 1]->attname))));
1101                         cstate->force_notnull_flags[attnum - 1] = true;
1102                 }
1103         }
1104
1105         /* Set up variables to avoid per-attribute overhead. */
1106         initStringInfo(&cstate->attribute_buf);
1107         initStringInfo(&cstate->line_buf);
1108         cstate->line_buf_converted = false;
1109         cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
1110         cstate->raw_buf_index = cstate->raw_buf_len = 0;
1111         cstate->processed = 0;
1112
1113         /*
1114          * Set up encoding conversion info.  Even if the client and server
1115          * encodings are the same, we must apply pg_client_to_server() to validate
1116          * data in multibyte encodings.
1117          */
1118         cstate->client_encoding = pg_get_client_encoding();
1119         cstate->need_transcoding =
1120                 (cstate->client_encoding != GetDatabaseEncoding() ||
1121                  pg_database_encoding_max_length() > 1);
1122         /* See Multibyte encoding comment above */
1123         cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
1124
1125         cstate->copy_dest = COPY_FILE;          /* default */
1126         cstate->filename = stmt->filename;
1127
1128         if (is_from)
1129                 CopyFrom(cstate);               /* copy from file to database */
1130         else
1131                 DoCopyTo(cstate);               /* copy from database to file */
1132
1133         /*
1134          * Close the relation or query.  If reading, we can release the
1135          * AccessShareLock we got; if writing, we should hold the lock until end
1136          * of transaction to ensure that updates will be committed before lock is
1137          * released.
1138          */
1139         if (cstate->rel)
1140                 heap_close(cstate->rel, (is_from ? NoLock : AccessShareLock));
1141         else
1142         {
1143                 /* Close down the query and free resources. */
1144                 ExecutorEnd(cstate->queryDesc);
1145                 FreeQueryDesc(cstate->queryDesc);
1146         }
1147
1148         /* Clean up storage (probably not really necessary) */
1149         processed = cstate->processed;
1150
1151         pfree(cstate->attribute_buf.data);
1152         pfree(cstate->line_buf.data);
1153         pfree(cstate->raw_buf);
1154         pfree(cstate);
1155
1156         return processed;
1157 }
1158
1159
1160 /*
1161  * This intermediate routine exists mainly to localize the effects of setjmp
1162  * so we don't need to plaster a lot of variables with "volatile".
1163  */
1164 static void
1165 DoCopyTo(CopyState cstate)
1166 {
1167         bool            pipe = (cstate->filename == NULL);
1168
1169         if (cstate->rel)
1170         {
1171                 if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
1172                 {
1173                         if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
1174                                 ereport(ERROR,
1175                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1176                                                  errmsg("cannot copy from view \"%s\"",
1177                                                                 RelationGetRelationName(cstate->rel)),
1178                                                  errhint("Try the COPY (SELECT ...) TO variant.")));
1179                         else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
1180                                 ereport(ERROR,
1181                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1182                                                  errmsg("cannot copy from sequence \"%s\"",
1183                                                                 RelationGetRelationName(cstate->rel))));
1184                         else
1185                                 ereport(ERROR,
1186                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1187                                                  errmsg("cannot copy from non-table relation \"%s\"",
1188                                                                 RelationGetRelationName(cstate->rel))));
1189                 }
1190         }
1191
1192         if (pipe)
1193         {
1194                 if (whereToSendOutput == DestRemote)
1195                         cstate->fe_copy = true;
1196                 else
1197                         cstate->copy_file = stdout;
1198         }
1199         else
1200         {
1201                 mode_t          oumask;         /* Pre-existing umask value */
1202                 struct stat st;
1203
1204                 /*
1205                  * Prevent write to relative path ... too easy to shoot oneself in the
1206                  * foot by overwriting a database file ...
1207                  */
1208                 if (!is_absolute_path(cstate->filename))
1209                         ereport(ERROR,
1210                                         (errcode(ERRCODE_INVALID_NAME),
1211                                          errmsg("relative path not allowed for COPY to file")));
1212
1213                 oumask = umask((mode_t) 022);
1214                 cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1215                 umask(oumask);
1216
1217                 if (cstate->copy_file == NULL)
1218                         ereport(ERROR,
1219                                         (errcode_for_file_access(),
1220                                          errmsg("could not open file \"%s\" for writing: %m",
1221                                                         cstate->filename)));
1222
1223                 fstat(fileno(cstate->copy_file), &st);
1224                 if (S_ISDIR(st.st_mode))
1225                 {
1226                         FreeFile(cstate->copy_file);
1227                         ereport(ERROR,
1228                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1229                                          errmsg("\"%s\" is a directory", cstate->filename)));
1230                 }
1231         }
1232
1233         PG_TRY();
1234         {
1235                 if (cstate->fe_copy)
1236                         SendCopyBegin(cstate);
1237
1238                 CopyTo(cstate);
1239
1240                 if (cstate->fe_copy)
1241                         SendCopyEnd(cstate);
1242         }
1243         PG_CATCH();
1244         {
1245                 /*
1246                  * Make sure we turn off old-style COPY OUT mode upon error. It is
1247                  * okay to do this in all cases, since it does nothing if the mode is
1248                  * not on.
1249                  */
1250                 pq_endcopyout(true);
1251                 PG_RE_THROW();
1252         }
1253         PG_END_TRY();
1254
1255         if (!pipe)
1256         {
1257                 if (FreeFile(cstate->copy_file))
1258                         ereport(ERROR,
1259                                         (errcode_for_file_access(),
1260                                          errmsg("could not write to file \"%s\": %m",
1261                                                         cstate->filename)));
1262         }
1263 }
1264
1265 /*
1266  * Copy from relation or query TO file.
1267  */
1268 static void
1269 CopyTo(CopyState cstate)
1270 {
1271         TupleDesc       tupDesc;
1272         int                     num_phys_attrs;
1273         Form_pg_attribute *attr;
1274         ListCell   *cur;
1275
1276         if (cstate->rel)
1277                 tupDesc = RelationGetDescr(cstate->rel);
1278         else
1279                 tupDesc = cstate->queryDesc->tupDesc;
1280         attr = tupDesc->attrs;
1281         num_phys_attrs = tupDesc->natts;
1282         cstate->null_print_client = cstate->null_print;         /* default */
1283
1284         /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1285         cstate->fe_msgbuf = makeStringInfo();
1286
1287         /* Get info about the columns we need to process. */
1288         cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1289         foreach(cur, cstate->attnumlist)
1290         {
1291                 int                     attnum = lfirst_int(cur);
1292                 Oid                     out_func_oid;
1293                 bool            isvarlena;
1294
1295                 if (cstate->binary)
1296                         getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
1297                                                                         &out_func_oid,
1298                                                                         &isvarlena);
1299                 else
1300                         getTypeOutputInfo(attr[attnum - 1]->atttypid,
1301                                                           &out_func_oid,
1302                                                           &isvarlena);
1303                 fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1304         }
1305
1306         /*
1307          * Create a temporary memory context that we can reset once per row to
1308          * recover palloc'd memory.  This avoids any problems with leaks inside
1309          * datatype output routines, and should be faster than retail pfree's
1310          * anyway.      (We don't need a whole econtext as CopyFrom does.)
1311          */
1312         cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1313                                                                                            "COPY TO",
1314                                                                                            ALLOCSET_DEFAULT_MINSIZE,
1315                                                                                            ALLOCSET_DEFAULT_INITSIZE,
1316                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
1317
1318         if (cstate->binary)
1319         {
1320                 /* Generate header for a binary copy */
1321                 int32           tmp;
1322
1323                 /* Signature */
1324                 CopySendData(cstate, (char *) BinarySignature, 11);
1325                 /* Flags field */
1326                 tmp = 0;
1327                 if (cstate->oids)
1328                         tmp |= (1 << 16);
1329                 CopySendInt32(cstate, tmp);
1330                 /* No header extension */
1331                 tmp = 0;
1332                 CopySendInt32(cstate, tmp);
1333         }
1334         else
1335         {
1336                 /*
1337                  * For non-binary copy, we need to convert null_print to client
1338                  * encoding, because it will be sent directly with CopySendString.
1339                  */
1340                 if (cstate->need_transcoding)
1341                         cstate->null_print_client = pg_server_to_client(cstate->null_print,
1342                                                                                                          cstate->null_print_len);
1343
1344                 /* if a header has been requested send the line */
1345                 if (cstate->header_line)
1346                 {
1347                         bool            hdr_delim = false;
1348
1349                         foreach(cur, cstate->attnumlist)
1350                         {
1351                                 int                     attnum = lfirst_int(cur);
1352                                 char       *colname;
1353
1354                                 if (hdr_delim)
1355                                         CopySendChar(cstate, cstate->delim[0]);
1356                                 hdr_delim = true;
1357
1358                                 colname = NameStr(attr[attnum - 1]->attname);
1359
1360                                 CopyAttributeOutCSV(cstate, colname, false,
1361                                                                         list_length(cstate->attnumlist) == 1);
1362                         }
1363
1364                         CopySendEndOfRow(cstate);
1365                 }
1366         }
1367
1368         if (cstate->rel)
1369         {
1370                 Datum      *values;
1371                 bool       *nulls;
1372                 HeapScanDesc scandesc;
1373                 HeapTuple       tuple;
1374
1375                 values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1376                 nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
1377
1378                 scandesc = heap_beginscan(cstate->rel, ActiveSnapshot, 0, NULL);
1379
1380                 while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
1381                 {
1382                         CHECK_FOR_INTERRUPTS();
1383
1384                         /* Deconstruct the tuple ... faster than repeated heap_getattr */
1385                         heap_deform_tuple(tuple, tupDesc, values, nulls);
1386
1387                         /* Format and send the data */
1388                         CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
1389                 }
1390
1391                 heap_endscan(scandesc);
1392         }
1393         else
1394         {
1395                 /* run the plan --- the dest receiver will send tuples */
1396                 ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
1397         }
1398
1399         if (cstate->binary)
1400         {
1401                 /* Generate trailer for a binary copy */
1402                 CopySendInt16(cstate, -1);
1403                 /* Need to flush out the trailer */
1404                 CopySendEndOfRow(cstate);
1405         }
1406
1407         MemoryContextDelete(cstate->rowcontext);
1408 }
1409
1410 /*
1411  * Emit one row during CopyTo().
1412  */
1413 static void
1414 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
1415 {
1416         bool            need_delim = false;
1417         FmgrInfo   *out_functions = cstate->out_functions;
1418         MemoryContext oldcontext;
1419         ListCell   *cur;
1420         char       *string;
1421
1422         MemoryContextReset(cstate->rowcontext);
1423         oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1424
1425         if (cstate->binary)
1426         {
1427                 /* Binary per-tuple header */
1428                 CopySendInt16(cstate, list_length(cstate->attnumlist));
1429                 /* Send OID if wanted --- note attnumlist doesn't include it */
1430                 if (cstate->oids)
1431                 {
1432                         /* Hack --- assume Oid is same size as int32 */
1433                         CopySendInt32(cstate, sizeof(int32));
1434                         CopySendInt32(cstate, tupleOid);
1435                 }
1436         }
1437         else
1438         {
1439                 /* Text format has no per-tuple header, but send OID if wanted */
1440                 /* Assume digits don't need any quoting or encoding conversion */
1441                 if (cstate->oids)
1442                 {
1443                         string = DatumGetCString(DirectFunctionCall1(oidout,
1444                                                                                                 ObjectIdGetDatum(tupleOid)));
1445                         CopySendString(cstate, string);
1446                         need_delim = true;
1447                 }
1448         }
1449
1450         foreach(cur, cstate->attnumlist)
1451         {
1452                 int                     attnum = lfirst_int(cur);
1453                 Datum           value = values[attnum - 1];
1454                 bool            isnull = nulls[attnum - 1];
1455
1456                 if (!cstate->binary)
1457                 {
1458                         if (need_delim)
1459                                 CopySendChar(cstate, cstate->delim[0]);
1460                         need_delim = true;
1461                 }
1462
1463                 if (isnull)
1464                 {
1465                         if (!cstate->binary)
1466                                 CopySendString(cstate, cstate->null_print_client);
1467                         else
1468                                 CopySendInt32(cstate, -1);
1469                 }
1470                 else
1471                 {
1472                         if (!cstate->binary)
1473                         {
1474                                 string = OutputFunctionCall(&out_functions[attnum - 1],
1475                                                                                         value);
1476                                 if (cstate->csv_mode)
1477                                         CopyAttributeOutCSV(cstate, string,
1478                                                                                 cstate->force_quote_flags[attnum - 1],
1479                                                                                 list_length(cstate->attnumlist) == 1);
1480                                 else
1481                                         CopyAttributeOutText(cstate, string);
1482                         }
1483                         else
1484                         {
1485                                 bytea      *outputbytes;
1486
1487                                 outputbytes = SendFunctionCall(&out_functions[attnum - 1],
1488                                                                                            value);
1489                                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
1490                                 CopySendData(cstate, VARDATA(outputbytes),
1491                                                          VARSIZE(outputbytes) - VARHDRSZ);
1492                         }
1493                 }
1494         }
1495
1496         CopySendEndOfRow(cstate);
1497
1498         MemoryContextSwitchTo(oldcontext);
1499
1500         cstate->processed++;
1501 }
1502
1503
1504 /*
1505  * error context callback for COPY FROM
1506  */
1507 static void
1508 copy_in_error_callback(void *arg)
1509 {
1510         CopyState       cstate = (CopyState) arg;
1511
1512         if (cstate->binary)
1513         {
1514                 /* can't usefully display the data */
1515                 if (cstate->cur_attname)
1516                         errcontext("COPY %s, line %d, column %s",
1517                                            cstate->cur_relname, cstate->cur_lineno,
1518                                            cstate->cur_attname);
1519                 else
1520                         errcontext("COPY %s, line %d",
1521                                            cstate->cur_relname, cstate->cur_lineno);
1522         }
1523         else
1524         {
1525                 if (cstate->cur_attname && cstate->cur_attval)
1526                 {
1527                         /* error is relevant to a particular column */
1528                         char       *attval;
1529
1530                         attval = limit_printout_length(cstate->cur_attval);
1531                         errcontext("COPY %s, line %d, column %s: \"%s\"",
1532                                            cstate->cur_relname, cstate->cur_lineno,
1533                                            cstate->cur_attname, attval);
1534                         pfree(attval);
1535                 }
1536                 else if (cstate->cur_attname)
1537                 {
1538                         /* error is relevant to a particular column, value is NULL */
1539                         errcontext("COPY %s, line %d, column %s: null input",
1540                                            cstate->cur_relname, cstate->cur_lineno,
1541                                            cstate->cur_attname);
1542                 }
1543                 else
1544                 {
1545                         /* error is relevant to a particular line */
1546                         if (cstate->line_buf_converted || !cstate->need_transcoding)
1547                         {
1548                                 char       *lineval;
1549
1550                                 lineval = limit_printout_length(cstate->line_buf.data);
1551                                 errcontext("COPY %s, line %d: \"%s\"",
1552                                                    cstate->cur_relname, cstate->cur_lineno, lineval);
1553                                 pfree(lineval);
1554                         }
1555                         else
1556                         {
1557                                 /*
1558                                  * Here, the line buffer is still in a foreign encoding, and
1559                                  * indeed it's quite likely that the error is precisely a
1560                                  * failure to do encoding conversion (ie, bad data).  We dare
1561                                  * not try to convert it, and at present there's no way to
1562                                  * regurgitate it without conversion.  So we have to punt and
1563                                  * just report the line number.
1564                                  */
1565                                 errcontext("COPY %s, line %d",
1566                                                    cstate->cur_relname, cstate->cur_lineno);
1567                         }
1568                 }
1569         }
1570 }
1571
1572 /*
1573  * Make sure we don't print an unreasonable amount of COPY data in a message.
1574  *
1575  * It would seem a lot easier to just use the sprintf "precision" limit to
1576  * truncate the string.  However, some versions of glibc have a bug/misfeature
1577  * that vsnprintf will always fail (return -1) if it is asked to truncate
1578  * a string that contains invalid byte sequences for the current encoding.
1579  * So, do our own truncation.  We return a pstrdup'd copy of the input.
1580  */
1581 static char *
1582 limit_printout_length(const char *str)
1583 {
1584 #define MAX_COPY_DATA_DISPLAY 100
1585
1586         int                     slen = strlen(str);
1587         int                     len;
1588         char       *res;
1589
1590         /* Fast path if definitely okay */
1591         if (slen <= MAX_COPY_DATA_DISPLAY)
1592                 return pstrdup(str);
1593
1594         /* Apply encoding-dependent truncation */
1595         len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
1596
1597         /*
1598          * Truncate, and add "..." to show we truncated the input.
1599          */
1600         res = (char *) palloc(len + 4);
1601         memcpy(res, str, len);
1602         strcpy(res + len, "...");
1603
1604         return res;
1605 }
1606
1607 /*
1608  * Copy FROM file to relation.
1609  */
1610 static void
1611 CopyFrom(CopyState cstate)
1612 {
1613         bool            pipe = (cstate->filename == NULL);
1614         HeapTuple       tuple;
1615         TupleDesc       tupDesc;
1616         Form_pg_attribute *attr;
1617         AttrNumber      num_phys_attrs,
1618                                 attr_count,
1619                                 num_defaults;
1620         FmgrInfo   *in_functions;
1621         FmgrInfo        oid_in_function;
1622         Oid                *typioparams;
1623         Oid                     oid_typioparam;
1624         int                     attnum;
1625         int                     i;
1626         Oid                     in_func_oid;
1627         Datum      *values;
1628         char       *nulls;
1629         int                     nfields;
1630         char      **field_strings;
1631         bool            done = false;
1632         bool            isnull;
1633         ResultRelInfo *resultRelInfo;
1634         EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
1635         TupleTableSlot *slot;
1636         bool            file_has_oids;
1637         int                *defmap;
1638         ExprState **defexprs;           /* array of default att expressions */
1639         ExprContext *econtext;          /* used for ExecEvalExpr for default atts */
1640         MemoryContext oldcontext = CurrentMemoryContext;
1641         ErrorContextCallback errcontext;
1642         CommandId       mycid = GetCurrentCommandId();
1643         bool            use_wal = true;         /* by default, use WAL logging */
1644         bool            use_fsm = true;         /* by default, use FSM for free space */
1645
1646         Assert(cstate->rel);
1647
1648         if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
1649         {
1650                 if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
1651                         ereport(ERROR,
1652                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1653                                          errmsg("cannot copy to view \"%s\"",
1654                                                         RelationGetRelationName(cstate->rel))));
1655                 else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
1656                         ereport(ERROR,
1657                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1658                                          errmsg("cannot copy to sequence \"%s\"",
1659                                                         RelationGetRelationName(cstate->rel))));
1660                 else
1661                         ereport(ERROR,
1662                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1663                                          errmsg("cannot copy to non-table relation \"%s\"",
1664                                                         RelationGetRelationName(cstate->rel))));
1665         }
1666
1667         /*----------
1668          * Check to see if we can avoid writing WAL
1669          *
1670          * If archive logging is not enabled *and* either
1671          *      - table was created in same transaction as this COPY
1672          *      - data is being written to relfilenode created in this transaction
1673          * then we can skip writing WAL.  It's safe because if the transaction
1674          * doesn't commit, we'll discard the table (or the new relfilenode file).
1675          * If it does commit, we'll have done the heap_sync at the bottom of this
1676          * routine first.
1677          *
1678          * As mentioned in comments in utils/rel.h, the in-same-transaction test
1679          * is not completely reliable, since in rare cases rd_createSubid or
1680          * rd_newRelfilenodeSubid can be cleared before the end of the transaction.
1681          * However this is OK since at worst we will fail to make the optimization.
1682          *
1683          * When skipping WAL it's entirely possible that COPY itself will write no
1684          * WAL records at all.  This is of concern because RecordTransactionCommit
1685          * might decide it doesn't need to log our eventual commit, which we
1686          * certainly need it to do.  However, we need no special action here for
1687          * that, because if we have a new table or new relfilenode then there
1688          * must have been a WAL-logged pg_class update earlier in the transaction.
1689          *
1690          * Also, if the target file is new-in-transaction, we assume that checking
1691          * FSM for free space is a waste of time, even if we must use WAL because
1692          * of archiving.  This could possibly be wrong, but it's unlikely.
1693          *
1694          * The comments for heap_insert and RelationGetBufferForTuple specify that
1695          * skipping WAL logging is only safe if we ensure that our tuples do not
1696          * go into pages containing tuples from any other transactions --- but this
1697          * must be the case if we have a new table or new relfilenode, so we need
1698          * no additional work to enforce that.
1699          *----------
1700          */
1701         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
1702                 cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
1703         {
1704                 use_fsm = false;
1705                 if (!XLogArchivingActive())
1706                         use_wal = false;
1707         }
1708
1709         if (pipe)
1710         {
1711                 if (whereToSendOutput == DestRemote)
1712                         ReceiveCopyBegin(cstate);
1713                 else
1714                         cstate->copy_file = stdin;
1715         }
1716         else
1717         {
1718                 struct stat st;
1719
1720                 cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
1721
1722                 if (cstate->copy_file == NULL)
1723                         ereport(ERROR,
1724                                         (errcode_for_file_access(),
1725                                          errmsg("could not open file \"%s\" for reading: %m",
1726                                                         cstate->filename)));
1727
1728                 fstat(fileno(cstate->copy_file), &st);
1729                 if (S_ISDIR(st.st_mode))
1730                 {
1731                         FreeFile(cstate->copy_file);
1732                         ereport(ERROR,
1733                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1734                                          errmsg("\"%s\" is a directory", cstate->filename)));
1735                 }
1736         }
1737
1738         tupDesc = RelationGetDescr(cstate->rel);
1739         attr = tupDesc->attrs;
1740         num_phys_attrs = tupDesc->natts;
1741         attr_count = list_length(cstate->attnumlist);
1742         num_defaults = 0;
1743
1744         /*
1745          * We need a ResultRelInfo so we can use the regular executor's
1746          * index-entry-making machinery.  (There used to be a huge amount of code
1747          * here that basically duplicated execUtils.c ...)
1748          */
1749         resultRelInfo = makeNode(ResultRelInfo);
1750         resultRelInfo->ri_RangeTableIndex = 1;          /* dummy */
1751         resultRelInfo->ri_RelationDesc = cstate->rel;
1752         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(cstate->rel->trigdesc);
1753         if (resultRelInfo->ri_TrigDesc)
1754                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1755                         palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
1756         resultRelInfo->ri_TrigInstrument = NULL;
1757
1758         ExecOpenIndices(resultRelInfo);
1759
1760         estate->es_result_relations = resultRelInfo;
1761         estate->es_num_result_relations = 1;
1762         estate->es_result_relation_info = resultRelInfo;
1763
1764         /* Set up a tuple slot too */
1765         slot = MakeSingleTupleTableSlot(tupDesc);
1766
1767         econtext = GetPerTupleExprContext(estate);
1768
1769         /*
1770          * Pick up the required catalog information for each attribute in the
1771          * relation, including the input function, the element type (to pass to
1772          * the input function), and info about defaults and constraints. (Which
1773          * input function we use depends on text/binary format choice.)
1774          */
1775         in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1776         typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
1777         defmap = (int *) palloc(num_phys_attrs * sizeof(int));
1778         defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
1779
1780         for (attnum = 1; attnum <= num_phys_attrs; attnum++)
1781         {
1782                 /* We don't need info for dropped attributes */
1783                 if (attr[attnum - 1]->attisdropped)
1784                         continue;
1785
1786                 /* Fetch the input function and typioparam info */
1787                 if (cstate->binary)
1788                         getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
1789                                                                    &in_func_oid, &typioparams[attnum - 1]);
1790                 else
1791                         getTypeInputInfo(attr[attnum - 1]->atttypid,
1792                                                          &in_func_oid, &typioparams[attnum - 1]);
1793                 fmgr_info(in_func_oid, &in_functions[attnum - 1]);
1794
1795                 /* Get default info if needed */
1796                 if (!list_member_int(cstate->attnumlist, attnum))
1797                 {
1798                         /* attribute is NOT to be copied from input */
1799                         /* use default value if one exists */
1800                         Node       *defexpr = build_column_default(cstate->rel, attnum);
1801
1802                         if (defexpr != NULL)
1803                         {
1804                                 defexprs[num_defaults] = ExecPrepareExpr((Expr *) defexpr,
1805                                                                                                                  estate);
1806                                 defmap[num_defaults] = attnum - 1;
1807                                 num_defaults++;
1808                         }
1809                 }
1810         }
1811
1812         /* Prepare to catch AFTER triggers. */
1813         AfterTriggerBeginQuery();
1814
1815         /*
1816          * Check BEFORE STATEMENT insertion triggers. It's debateable whether we
1817          * should do this for COPY, since it's not really an "INSERT" statement as
1818          * such. However, executing these triggers maintains consistency with the
1819          * EACH ROW triggers that we already fire on COPY.
1820          */
1821         ExecBSInsertTriggers(estate, resultRelInfo);
1822
1823         if (!cstate->binary)
1824                 file_has_oids = cstate->oids;   /* must rely on user to tell us... */
1825         else
1826         {
1827                 /* Read and verify binary header */
1828                 char            readSig[11];
1829                 int32           tmp;
1830
1831                 /* Signature */
1832                 if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
1833                         memcmp(readSig, BinarySignature, 11) != 0)
1834                         ereport(ERROR,
1835                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1836                                          errmsg("COPY file signature not recognized")));
1837                 /* Flags field */
1838                 if (!CopyGetInt32(cstate, &tmp))
1839                         ereport(ERROR,
1840                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1841                                          errmsg("invalid COPY file header (missing flags)")));
1842                 file_has_oids = (tmp & (1 << 16)) != 0;
1843                 tmp &= ~(1 << 16);
1844                 if ((tmp >> 16) != 0)
1845                         ereport(ERROR,
1846                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1847                                  errmsg("unrecognized critical flags in COPY file header")));
1848                 /* Header extension length */
1849                 if (!CopyGetInt32(cstate, &tmp) ||
1850                         tmp < 0)
1851                         ereport(ERROR,
1852                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1853                                          errmsg("invalid COPY file header (missing length)")));
1854                 /* Skip extension header, if present */
1855                 while (tmp-- > 0)
1856                 {
1857                         if (CopyGetData(cstate, readSig, 1, 1) != 1)
1858                                 ereport(ERROR,
1859                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1860                                                  errmsg("invalid COPY file header (wrong length)")));
1861                 }
1862         }
1863
1864         if (file_has_oids && cstate->binary)
1865         {
1866                 getTypeBinaryInputInfo(OIDOID,
1867                                                            &in_func_oid, &oid_typioparam);
1868                 fmgr_info(in_func_oid, &oid_in_function);
1869         }
1870
1871         values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
1872         nulls = (char *) palloc(num_phys_attrs * sizeof(char));
1873
1874         /* create workspace for CopyReadAttributes results */
1875         nfields = file_has_oids ? (attr_count + 1) : attr_count;
1876         field_strings = (char **) palloc(nfields * sizeof(char *));
1877
1878         /* Initialize state variables */
1879         cstate->fe_eof = false;
1880         cstate->eol_type = EOL_UNKNOWN;
1881         cstate->cur_relname = RelationGetRelationName(cstate->rel);
1882         cstate->cur_lineno = 0;
1883         cstate->cur_attname = NULL;
1884         cstate->cur_attval = NULL;
1885
1886         /* Set up callback to identify error line number */
1887         errcontext.callback = copy_in_error_callback;
1888         errcontext.arg = (void *) cstate;
1889         errcontext.previous = error_context_stack;
1890         error_context_stack = &errcontext;
1891
1892         /* on input just throw the header line away */
1893         if (cstate->header_line)
1894         {
1895                 cstate->cur_lineno++;
1896                 done = CopyReadLine(cstate);
1897         }
1898
1899         while (!done)
1900         {
1901                 bool            skip_tuple;
1902                 Oid                     loaded_oid = InvalidOid;
1903
1904                 CHECK_FOR_INTERRUPTS();
1905
1906                 cstate->cur_lineno++;
1907
1908                 /* Reset the per-tuple exprcontext */
1909                 ResetPerTupleExprContext(estate);
1910
1911                 /* Switch into its memory context */
1912                 MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1913
1914                 /* Initialize all values for row to NULL */
1915                 MemSet(values, 0, num_phys_attrs * sizeof(Datum));
1916                 MemSet(nulls, 'n', num_phys_attrs * sizeof(char));
1917
1918                 if (!cstate->binary)
1919                 {
1920                         ListCell   *cur;
1921                         int                     fldct;
1922                         int                     fieldno;
1923                         char       *string;
1924
1925                         /* Actually read the line into memory here */
1926                         done = CopyReadLine(cstate);
1927
1928                         /*
1929                          * EOF at start of line means we're done.  If we see EOF after
1930                          * some characters, we act as though it was newline followed by
1931                          * EOF, ie, process the line and then exit loop on next iteration.
1932                          */
1933                         if (done && cstate->line_buf.len == 0)
1934                                 break;
1935
1936                         /* Parse the line into de-escaped field values */
1937                         if (cstate->csv_mode)
1938                                 fldct = CopyReadAttributesCSV(cstate, nfields, field_strings);
1939                         else
1940                                 fldct = CopyReadAttributesText(cstate, nfields, field_strings);
1941                         fieldno = 0;
1942
1943                         /* Read the OID field if present */
1944                         if (file_has_oids)
1945                         {
1946                                 if (fieldno >= fldct)
1947                                         ereport(ERROR,
1948                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1949                                                          errmsg("missing data for OID column")));
1950                                 string = field_strings[fieldno++];
1951
1952                                 if (string == NULL)
1953                                         ereport(ERROR,
1954                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1955                                                          errmsg("null OID in COPY data")));
1956                                 else
1957                                 {
1958                                         cstate->cur_attname = "oid";
1959                                         cstate->cur_attval = string;
1960                                         loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
1961                                                                                                    CStringGetDatum(string)));
1962                                         if (loaded_oid == InvalidOid)
1963                                                 ereport(ERROR,
1964                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1965                                                                  errmsg("invalid OID in COPY data")));
1966                                         cstate->cur_attname = NULL;
1967                                         cstate->cur_attval = NULL;
1968                                 }
1969                         }
1970
1971                         /* Loop to read the user attributes on the line. */
1972                         foreach(cur, cstate->attnumlist)
1973                         {
1974                                 int                     attnum = lfirst_int(cur);
1975                                 int                     m = attnum - 1;
1976
1977                                 if (fieldno >= fldct)
1978                                         ereport(ERROR,
1979                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1980                                                          errmsg("missing data for column \"%s\"",
1981                                                                         NameStr(attr[m]->attname))));
1982                                 string = field_strings[fieldno++];
1983
1984                                 if (cstate->csv_mode && string == NULL &&
1985                                         cstate->force_notnull_flags[m])
1986                                 {
1987                                         /* Go ahead and read the NULL string */
1988                                         string = cstate->null_print;
1989                                 }
1990
1991                                 cstate->cur_attname = NameStr(attr[m]->attname);
1992                                 cstate->cur_attval = string;
1993                                 values[m] = InputFunctionCall(&in_functions[m],
1994                                                                                           string,
1995                                                                                           typioparams[m],
1996                                                                                           attr[m]->atttypmod);
1997                                 if (string != NULL)
1998                                         nulls[m] = ' ';
1999                                 cstate->cur_attname = NULL;
2000                                 cstate->cur_attval = NULL;
2001                         }
2002
2003                         Assert(fieldno == nfields);
2004                 }
2005                 else
2006                 {
2007                         /* binary */
2008                         int16           fld_count;
2009                         ListCell   *cur;
2010
2011                         if (!CopyGetInt16(cstate, &fld_count) ||
2012                                 fld_count == -1)
2013                         {
2014                                 done = true;
2015                                 break;
2016                         }
2017
2018                         if (fld_count != attr_count)
2019                                 ereport(ERROR,
2020                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2021                                                  errmsg("row field count is %d, expected %d",
2022                                                                 (int) fld_count, attr_count)));
2023
2024                         if (file_has_oids)
2025                         {
2026                                 cstate->cur_attname = "oid";
2027                                 loaded_oid =
2028                                         DatumGetObjectId(CopyReadBinaryAttribute(cstate,
2029                                                                                                                          0,
2030                                                                                                                          &oid_in_function,
2031                                                                                                                          oid_typioparam,
2032                                                                                                                          -1,
2033                                                                                                                          &isnull));
2034                                 if (isnull || loaded_oid == InvalidOid)
2035                                         ereport(ERROR,
2036                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2037                                                          errmsg("invalid OID in COPY data")));
2038                                 cstate->cur_attname = NULL;
2039                         }
2040
2041                         i = 0;
2042                         foreach(cur, cstate->attnumlist)
2043                         {
2044                                 int                     attnum = lfirst_int(cur);
2045                                 int                     m = attnum - 1;
2046
2047                                 cstate->cur_attname = NameStr(attr[m]->attname);
2048                                 i++;
2049                                 values[m] = CopyReadBinaryAttribute(cstate,
2050                                                                                                         i,
2051                                                                                                         &in_functions[m],
2052                                                                                                         typioparams[m],
2053                                                                                                         attr[m]->atttypmod,
2054                                                                                                         &isnull);
2055                                 nulls[m] = isnull ? 'n' : ' ';
2056                                 cstate->cur_attname = NULL;
2057                         }
2058                 }
2059
2060                 /*
2061                  * Now compute and insert any defaults available for the columns not
2062                  * provided by the input data.  Anything not processed here or above
2063                  * will remain NULL.
2064                  */
2065                 for (i = 0; i < num_defaults; i++)
2066                 {
2067                         values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
2068                                                                                          &isnull, NULL);
2069                         if (!isnull)
2070                                 nulls[defmap[i]] = ' ';
2071                 }
2072
2073                 /* And now we can form the input tuple. */
2074                 tuple = heap_formtuple(tupDesc, values, nulls);
2075
2076                 if (cstate->oids && file_has_oids)
2077                         HeapTupleSetOid(tuple, loaded_oid);
2078
2079                 /* Triggers and stuff need to be invoked in query context. */
2080                 MemoryContextSwitchTo(oldcontext);
2081
2082                 skip_tuple = false;
2083
2084                 /* BEFORE ROW INSERT Triggers */
2085                 if (resultRelInfo->ri_TrigDesc &&
2086                   resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
2087                 {
2088                         HeapTuple       newtuple;
2089
2090                         newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
2091
2092                         if (newtuple == NULL)           /* "do nothing" */
2093                                 skip_tuple = true;
2094                         else if (newtuple != tuple) /* modified by Trigger(s) */
2095                         {
2096                                 heap_freetuple(tuple);
2097                                 tuple = newtuple;
2098                         }
2099                 }
2100
2101                 if (!skip_tuple)
2102                 {
2103                         /* Place tuple in tuple slot */
2104                         ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2105
2106                         /* Check the constraints of the tuple */
2107                         if (cstate->rel->rd_att->constr)
2108                                 ExecConstraints(resultRelInfo, slot, estate);
2109
2110                         /* OK, store the tuple and create index entries for it */
2111                         heap_insert(cstate->rel, tuple, mycid, use_wal, use_fsm);
2112
2113                         if (resultRelInfo->ri_NumIndices > 0)
2114                                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
2115
2116                         /* AFTER ROW INSERT Triggers */
2117                         ExecARInsertTriggers(estate, resultRelInfo, tuple);
2118
2119                         /*
2120                          * We count only tuples not suppressed by a BEFORE INSERT trigger;
2121                          * this is the same definition used by execMain.c for counting
2122                          * tuples inserted by an INSERT command.
2123                          */
2124                         cstate->processed++;
2125                 }
2126         }
2127
2128         /* Done, clean up */
2129         error_context_stack = errcontext.previous;
2130
2131         MemoryContextSwitchTo(oldcontext);
2132
2133         /* Execute AFTER STATEMENT insertion triggers */
2134         ExecASInsertTriggers(estate, resultRelInfo);
2135
2136         /* Handle queued AFTER triggers */
2137         AfterTriggerEndQuery(estate);
2138
2139         pfree(values);
2140         pfree(nulls);
2141         pfree(field_strings);
2142
2143         pfree(in_functions);
2144         pfree(typioparams);
2145         pfree(defmap);
2146         pfree(defexprs);
2147
2148         ExecDropSingleTupleTableSlot(slot);
2149
2150         ExecCloseIndices(resultRelInfo);
2151
2152         FreeExecutorState(estate);
2153
2154         if (!pipe)
2155         {
2156                 if (FreeFile(cstate->copy_file))
2157                         ereport(ERROR,
2158                                         (errcode_for_file_access(),
2159                                          errmsg("could not read from file \"%s\": %m",
2160                                                         cstate->filename)));
2161         }
2162
2163         /*
2164          * If we skipped writing WAL, then we need to sync the heap (but not
2165          * indexes since those use WAL anyway)
2166          */
2167         if (!use_wal)
2168                 heap_sync(cstate->rel);
2169 }
2170
2171
2172 /*
2173  * Read the next input line and stash it in line_buf, with conversion to
2174  * server encoding.
2175  *
2176  * Result is true if read was terminated by EOF, false if terminated
2177  * by newline.  The terminating newline or EOF marker is not included
2178  * in the final value of line_buf.
2179  */
2180 static bool
2181 CopyReadLine(CopyState cstate)
2182 {
2183         bool            result;
2184
2185         resetStringInfo(&cstate->line_buf);
2186
2187         /* Mark that encoding conversion hasn't occurred yet */
2188         cstate->line_buf_converted = false;
2189
2190         /* Parse data and transfer into line_buf */
2191         result = CopyReadLineText(cstate);
2192
2193         if (result)
2194         {
2195                 /*
2196                  * Reached EOF.  In protocol version 3, we should ignore anything
2197                  * after \. up to the protocol end of copy data.  (XXX maybe better
2198                  * not to treat \. as special?)
2199                  */
2200                 if (cstate->copy_dest == COPY_NEW_FE)
2201                 {
2202                         do
2203                         {
2204                                 cstate->raw_buf_index = cstate->raw_buf_len;
2205                         } while (CopyLoadRawBuf(cstate));
2206                 }
2207         }
2208         else
2209         {
2210                 /*
2211                  * If we didn't hit EOF, then we must have transferred the EOL marker
2212                  * to line_buf along with the data.  Get rid of it.
2213                  */
2214                 switch (cstate->eol_type)
2215                 {
2216                         case EOL_NL:
2217                                 Assert(cstate->line_buf.len >= 1);
2218                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2219                                 cstate->line_buf.len--;
2220                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2221                                 break;
2222                         case EOL_CR:
2223                                 Assert(cstate->line_buf.len >= 1);
2224                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
2225                                 cstate->line_buf.len--;
2226                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2227                                 break;
2228                         case EOL_CRNL:
2229                                 Assert(cstate->line_buf.len >= 2);
2230                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
2231                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
2232                                 cstate->line_buf.len -= 2;
2233                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
2234                                 break;
2235                         case EOL_UNKNOWN:
2236                                 /* shouldn't get here */
2237                                 Assert(false);
2238                                 break;
2239                 }
2240         }
2241
2242         /* Done reading the line.  Convert it to server encoding. */
2243         if (cstate->need_transcoding)
2244         {
2245                 char       *cvt;
2246
2247                 cvt = pg_client_to_server(cstate->line_buf.data,
2248                                                                   cstate->line_buf.len);
2249                 if (cvt != cstate->line_buf.data)
2250                 {
2251                         /* transfer converted data back to line_buf */
2252                         resetStringInfo(&cstate->line_buf);
2253                         appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
2254                         pfree(cvt);
2255                 }
2256         }
2257
2258         /* Now it's safe to use the buffer in error messages */
2259         cstate->line_buf_converted = true;
2260
2261         return result;
2262 }
2263
2264 /*
2265  * CopyReadLineText - inner loop of CopyReadLine for text mode
2266  */
2267 static bool
2268 CopyReadLineText(CopyState cstate)
2269 {
2270         char       *copy_raw_buf;
2271         int                     raw_buf_ptr;
2272         int                     copy_buf_len;
2273         bool            need_data = false;
2274         bool            hit_eof = false;
2275         bool            result = false;
2276         char            mblen_str[2];
2277
2278         /* CSV variables */
2279         bool            first_char_in_line = true;
2280         bool            in_quote = false,
2281                                 last_was_esc = false;
2282         char            quotec = '\0';
2283         char            escapec = '\0';
2284
2285         if (cstate->csv_mode)
2286         {
2287                 quotec = cstate->quote[0];
2288                 escapec = cstate->escape[0];
2289                 /* ignore special escape processing if it's the same as quotec */
2290                 if (quotec == escapec)
2291                         escapec = '\0';
2292         }
2293
2294         mblen_str[1] = '\0';
2295
2296         /*
2297          * The objective of this loop is to transfer the entire next input line
2298          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
2299          * \n) and the end-of-copy marker (\.).
2300          *
2301          * In CSV mode, \r and \n inside a quoted field are just part of the data
2302          * value and are put in line_buf.  We keep just enough state to know if we
2303          * are currently in a quoted field or not.
2304          *
2305          * These four characters, and the CSV escape and quote characters, are
2306          * assumed the same in frontend and backend encodings.
2307          *
2308          * For speed, we try to move data from raw_buf to line_buf in chunks
2309          * rather than one character at a time.  raw_buf_ptr points to the next
2310          * character to examine; any characters from raw_buf_index to raw_buf_ptr
2311          * have been determined to be part of the line, but not yet transferred to
2312          * line_buf.
2313          *
2314          * For a little extra speed within the loop, we copy raw_buf and
2315          * raw_buf_len into local variables.
2316          */
2317         copy_raw_buf = cstate->raw_buf;
2318         raw_buf_ptr = cstate->raw_buf_index;
2319         copy_buf_len = cstate->raw_buf_len;
2320
2321         for (;;)
2322         {
2323                 int                     prev_raw_ptr;
2324                 char            c;
2325
2326                 /*
2327                  * Load more data if needed.  Ideally we would just force four bytes
2328                  * of read-ahead and avoid the many calls to
2329                  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
2330                  * does not allow us to read too far ahead or we might read into the
2331                  * next data, so we read-ahead only as far we know we can.      One
2332                  * optimization would be to read-ahead four byte here if
2333                  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
2334                  * considering the size of the buffer.
2335                  */
2336                 if (raw_buf_ptr >= copy_buf_len || need_data)
2337                 {
2338                         REFILL_LINEBUF;
2339
2340                         /*
2341                          * Try to read some more data.  This will certainly reset
2342                          * raw_buf_index to zero, and raw_buf_ptr must go with it.
2343                          */
2344                         if (!CopyLoadRawBuf(cstate))
2345                                 hit_eof = true;
2346                         raw_buf_ptr = 0;
2347                         copy_buf_len = cstate->raw_buf_len;
2348
2349                         /*
2350                          * If we are completely out of data, break out of the loop,
2351                          * reporting EOF.
2352                          */
2353                         if (copy_buf_len <= 0)
2354                         {
2355                                 result = true;
2356                                 break;
2357                         }
2358                         need_data = false;
2359                 }
2360
2361                 /* OK to fetch a character */
2362                 prev_raw_ptr = raw_buf_ptr;
2363                 c = copy_raw_buf[raw_buf_ptr++];
2364
2365                 if (cstate->csv_mode)
2366                 {
2367                         /*
2368                          * If character is '\\' or '\r', we may need to look ahead below.
2369                          * Force fetch of the next character if we don't already have it.
2370                          * We need to do this before changing CSV state, in case one of
2371                          * these characters is also the quote or escape character.
2372                          *
2373                          * Note: old-protocol does not like forced prefetch, but it's OK
2374                          * here since we cannot validly be at EOF.
2375                          */
2376                         if (c == '\\' || c == '\r')
2377                         {
2378                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2379                         }
2380
2381                         /*
2382                          * Dealing with quotes and escapes here is mildly tricky. If the
2383                          * quote char is also the escape char, there's no problem - we
2384                          * just use the char as a toggle. If they are different, we need
2385                          * to ensure that we only take account of an escape inside a
2386                          * quoted field and immediately preceding a quote char, and not
2387                          * the second in a escape-escape sequence.
2388                          */
2389                         if (in_quote && c == escapec)
2390                                 last_was_esc = !last_was_esc;
2391                         if (c == quotec && !last_was_esc)
2392                                 in_quote = !in_quote;
2393                         if (c != escapec)
2394                                 last_was_esc = false;
2395
2396                         /*
2397                          * Updating the line count for embedded CR and/or LF chars is
2398                          * necessarily a little fragile - this test is probably about the
2399                          * best we can do.      (XXX it's arguable whether we should do this
2400                          * at all --- is cur_lineno a physical or logical count?)
2401                          */
2402                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
2403                                 cstate->cur_lineno++;
2404                 }
2405
2406                 /* Process \r */
2407                 if (c == '\r' && (!cstate->csv_mode || !in_quote))
2408                 {
2409                         /* Check for \r\n on first line, _and_ handle \r\n. */
2410                         if (cstate->eol_type == EOL_UNKNOWN ||
2411                                 cstate->eol_type == EOL_CRNL)
2412                         {
2413                                 /*
2414                                  * If need more data, go back to loop top to load it.
2415                                  *
2416                                  * Note that if we are at EOF, c will wind up as '\0' because
2417                                  * of the guaranteed pad of raw_buf.
2418                                  */
2419                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2420
2421                                 /* get next char */
2422                                 c = copy_raw_buf[raw_buf_ptr];
2423
2424                                 if (c == '\n')
2425                                 {
2426                                         raw_buf_ptr++;          /* eat newline */
2427                                         cstate->eol_type = EOL_CRNL;            /* in case not set yet */
2428                                 }
2429                                 else
2430                                 {
2431                                         /* found \r, but no \n */
2432                                         if (cstate->eol_type == EOL_CRNL)
2433                                                 ereport(ERROR,
2434                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2435                                                                  errmsg(!cstate->csv_mode ?
2436                                                                         "literal carriage return found in data" :
2437                                                                    "unquoted carriage return found in data"),
2438                                                                  errhint(!cstate->csv_mode ?
2439                                                                 "Use \"\\r\" to represent carriage return." :
2440                                                                                  "Use quoted CSV field to represent carriage return.")));
2441
2442                                         /*
2443                                          * if we got here, it is the first line and we didn't find
2444                                          * \n, so don't consume the peeked character
2445                                          */
2446                                         cstate->eol_type = EOL_CR;
2447                                 }
2448                         }
2449                         else if (cstate->eol_type == EOL_NL)
2450                                 ereport(ERROR,
2451                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2452                                                  errmsg(!cstate->csv_mode ?
2453                                                                 "literal carriage return found in data" :
2454                                                                 "unquoted carriage return found in data"),
2455                                                  errhint(!cstate->csv_mode ?
2456                                                                  "Use \"\\r\" to represent carriage return." :
2457                                          "Use quoted CSV field to represent carriage return.")));
2458                         /* If reach here, we have found the line terminator */
2459                         break;
2460                 }
2461
2462                 /* Process \n */
2463                 if (c == '\n' && (!cstate->csv_mode || !in_quote))
2464                 {
2465                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
2466                                 ereport(ERROR,
2467                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2468                                                  errmsg(!cstate->csv_mode ?
2469                                                                 "literal newline found in data" :
2470                                                                 "unquoted newline found in data"),
2471                                                  errhint(!cstate->csv_mode ?
2472                                                                  "Use \"\\n\" to represent newline." :
2473                                                          "Use quoted CSV field to represent newline.")));
2474                         cstate->eol_type = EOL_NL;      /* in case not set yet */
2475                         /* If reach here, we have found the line terminator */
2476                         break;
2477                 }
2478
2479                 /*
2480                  * In CSV mode, we only recognize \. alone on a line.  This is because
2481                  * \. is a valid CSV data value.
2482                  */
2483                 if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
2484                 {
2485                         char            c2;
2486
2487                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2488                         IF_NEED_REFILL_AND_EOF_BREAK(0);
2489
2490                         /* -----
2491                          * get next character
2492                          * Note: we do not change c so if it isn't \., we can fall
2493                          * through and continue processing for client encoding.
2494                          * -----
2495                          */
2496                         c2 = copy_raw_buf[raw_buf_ptr];
2497
2498                         if (c2 == '.')
2499                         {
2500                                 raw_buf_ptr++;  /* consume the '.' */
2501
2502                                 /*
2503                                  * Note: if we loop back for more data here, it does not
2504                                  * matter that the CSV state change checks are re-executed; we
2505                                  * will come back here with no important state changed.
2506                                  */
2507                                 if (cstate->eol_type == EOL_CRNL)
2508                                 {
2509                                         /* Get the next character */
2510                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2511                                         /* if hit_eof, c2 will become '\0' */
2512                                         c2 = copy_raw_buf[raw_buf_ptr++];
2513
2514                                         if (c2 == '\n')
2515                                         {
2516                                                 if (!cstate->csv_mode)
2517                                                         ereport(ERROR,
2518                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2519                                                                          errmsg("end-of-copy marker does not match previous newline style")));
2520                                                 else
2521                                                         NO_END_OF_COPY_GOTO;
2522                                         }
2523                                         else if (c2 != '\r')
2524                                         {
2525                                                 if (!cstate->csv_mode)
2526                                                         ereport(ERROR,
2527                                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2528                                                                          errmsg("end-of-copy marker corrupt")));
2529                                                 else
2530                                                         NO_END_OF_COPY_GOTO;
2531                                         }
2532                                 }
2533
2534                                 /* Get the next character */
2535                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
2536                                 /* if hit_eof, c2 will become '\0' */
2537                                 c2 = copy_raw_buf[raw_buf_ptr++];
2538
2539                                 if (c2 != '\r' && c2 != '\n')
2540                                 {
2541                                         if (!cstate->csv_mode)
2542                                                 ereport(ERROR,
2543                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2544                                                                  errmsg("end-of-copy marker corrupt")));
2545                                         else
2546                                                 NO_END_OF_COPY_GOTO;
2547                                 }
2548
2549                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
2550                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
2551                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
2552                                 {
2553                                         ereport(ERROR,
2554                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2555                                                          errmsg("end-of-copy marker does not match previous newline style")));
2556                                 }
2557
2558                                 /*
2559                                  * Transfer only the data before the \. into line_buf, then
2560                                  * discard the data and the \. sequence.
2561                                  */
2562                                 if (prev_raw_ptr > cstate->raw_buf_index)
2563                                         appendBinaryStringInfo(&cstate->line_buf,
2564                                                                          cstate->raw_buf + cstate->raw_buf_index,
2565                                                                            prev_raw_ptr - cstate->raw_buf_index);
2566                                 cstate->raw_buf_index = raw_buf_ptr;
2567                                 result = true;  /* report EOF */
2568                                 break;
2569                         }
2570                         else if (!cstate->csv_mode)
2571
2572                                 /*
2573                                  * If we are here, it means we found a backslash followed by
2574                                  * something other than a period.  In non-CSV mode, anything
2575                                  * after a backslash is special, so we skip over that second
2576                                  * character too.  If we didn't do that \\. would be
2577                                  * considered an eof-of copy, while in non-CVS mode it is a
2578                                  * literal backslash followed by a period.      In CSV mode,
2579                                  * backslashes are not special, so we want to process the
2580                                  * character after the backslash just like a normal character,
2581                                  * so we don't increment in those cases.
2582                                  */
2583                                 raw_buf_ptr++;
2584                 }
2585
2586                 /*
2587                  * This label is for CSV cases where \. appears at the start of a
2588                  * line, but there is more text after it, meaning it was a data value.
2589                  * We are more strict for \. in CSV mode because \. could be a data
2590                  * value, while in non-CSV mode, \. cannot be a data value.
2591                  */
2592 not_end_of_copy:
2593
2594                 /*
2595                  * Process all bytes of a multi-byte character as a group.
2596                  *
2597                  * We only support multi-byte sequences where the first byte has the
2598                  * high-bit set, so as an optimization we can avoid this block
2599                  * entirely if it is not set.
2600                  */
2601                 if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
2602                 {
2603                         int                     mblen;
2604
2605                         mblen_str[0] = c;
2606                         /* All our encodings only read the first byte to get the length */
2607                         mblen = pg_encoding_mblen(cstate->client_encoding, mblen_str);
2608                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
2609                         IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
2610                         raw_buf_ptr += mblen - 1;
2611                 }
2612                 first_char_in_line = false;
2613         }                                                       /* end of outer loop */
2614
2615         /*
2616          * Transfer any still-uncopied data to line_buf.
2617          */
2618         REFILL_LINEBUF;
2619
2620         return result;
2621 }
2622
2623 /*
2624  *      Return decimal value for a hexadecimal digit
2625  */
2626 static int
2627 GetDecimalFromHex(char hex)
2628 {
2629         if (isdigit((unsigned char) hex))
2630                 return hex - '0';
2631         else
2632                 return tolower((unsigned char) hex) - 'a' + 10;
2633 }
2634
2635 /*
2636  * Parse the current line into separate attributes (fields),
2637  * performing de-escaping as needed.
2638  *
2639  * The input is in line_buf.  We use attribute_buf to hold the result
2640  * strings.  fieldvals[k] is set to point to the k'th attribute string,
2641  * or NULL when the input matches the null marker string.  (Note that the
2642  * caller cannot check for nulls since the returned string would be the
2643  * post-de-escaping equivalent, which may look the same as some valid data
2644  * string.)
2645  *
2646  * delim is the column delimiter string (must be just one byte for now).
2647  * null_print is the null marker string.  Note that this is compared to
2648  * the pre-de-escaped input string.
2649  *
2650  * The return value is the number of fields actually read.      (We error out
2651  * if this would exceed maxfields, which is the length of fieldvals[].)
2652  */
2653 static int
2654 CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
2655 {
2656         char            delimc = cstate->delim[0];
2657         int                     fieldno;
2658         char       *output_ptr;
2659         char       *cur_ptr;
2660         char       *line_end_ptr;
2661
2662         /*
2663          * We need a special case for zero-column tables: check that the input
2664          * line is empty, and return.
2665          */
2666         if (maxfields <= 0)
2667         {
2668                 if (cstate->line_buf.len != 0)
2669                         ereport(ERROR,
2670                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2671                                          errmsg("extra data after last expected column")));
2672                 return 0;
2673         }
2674
2675         resetStringInfo(&cstate->attribute_buf);
2676
2677         /*
2678          * The de-escaped attributes will certainly not be longer than the input
2679          * data line, so we can just force attribute_buf to be large enough and
2680          * then transfer data without any checks for enough space.      We need to do
2681          * it this way because enlarging attribute_buf mid-stream would invalidate
2682          * pointers already stored into fieldvals[].
2683          */
2684         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
2685                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
2686         output_ptr = cstate->attribute_buf.data;
2687
2688         /* set pointer variables for loop */
2689         cur_ptr = cstate->line_buf.data;
2690         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
2691
2692         /* Outer loop iterates over fields */
2693         fieldno = 0;
2694         for (;;)
2695         {
2696                 bool            found_delim = false;
2697                 char       *start_ptr;
2698                 char       *end_ptr;
2699                 int                     input_len;
2700
2701                 /* Make sure space remains in fieldvals[] */
2702                 if (fieldno >= maxfields)
2703                         ereport(ERROR,
2704                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2705                                          errmsg("extra data after last expected column")));
2706
2707                 /* Remember start of field on both input and output sides */
2708                 start_ptr = cur_ptr;
2709                 fieldvals[fieldno] = output_ptr;
2710
2711                 /* Scan data for field */
2712                 for (;;)
2713                 {
2714                         char            c;
2715
2716                         end_ptr = cur_ptr;
2717                         if (cur_ptr >= line_end_ptr)
2718                                 break;
2719                         c = *cur_ptr++;
2720                         if (c == delimc)
2721                         {
2722                                 found_delim = true;
2723                                 break;
2724                         }
2725                         if (c == '\\')
2726                         {
2727                                 if (cur_ptr >= line_end_ptr)
2728                                         break;
2729                                 c = *cur_ptr++;
2730                                 switch (c)
2731                                 {
2732                                         case '0':
2733                                         case '1':
2734                                         case '2':
2735                                         case '3':
2736                                         case '4':
2737                                         case '5':
2738                                         case '6':
2739                                         case '7':
2740                                                 {
2741                                                         /* handle \013 */
2742                                                         int                     val;
2743
2744                                                         val = OCTVALUE(c);
2745                                                         if (cur_ptr < line_end_ptr)
2746                                                         {
2747                                                                 c = *cur_ptr;
2748                                                                 if (ISOCTAL(c))
2749                                                                 {
2750                                                                         cur_ptr++;
2751                                                                         val = (val << 3) + OCTVALUE(c);
2752                                                                         if (cur_ptr < line_end_ptr)
2753                                                                         {
2754                                                                                 c = *cur_ptr;
2755                                                                                 if (ISOCTAL(c))
2756                                                                                 {
2757                                                                                         cur_ptr++;
2758                                                                                         val = (val << 3) + OCTVALUE(c);
2759                                                                                 }
2760                                                                         }
2761                                                                 }
2762                                                         }
2763                                                         c = val & 0377;
2764                                                 }
2765                                                 break;
2766                                         case 'x':
2767                                                 /* Handle \x3F */
2768                                                 if (cur_ptr < line_end_ptr)
2769                                                 {
2770                                                         char            hexchar = *cur_ptr;
2771
2772                                                         if (isxdigit((unsigned char) hexchar))
2773                                                         {
2774                                                                 int                     val = GetDecimalFromHex(hexchar);
2775
2776                                                                 cur_ptr++;
2777                                                                 if (cur_ptr < line_end_ptr)
2778                                                                 {
2779                                                                         hexchar = *cur_ptr;
2780                                                                         if (isxdigit((unsigned char) hexchar))
2781                                                                         {
2782                                                                                 cur_ptr++;
2783                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
2784                                                                         }
2785                                                                 }
2786                                                                 c = val & 0xff;
2787                                                         }
2788                                                 }
2789                                                 break;
2790                                         case 'b':
2791                                                 c = '\b';
2792                                                 break;
2793                                         case 'f':
2794                                                 c = '\f';
2795                                                 break;
2796                                         case 'n':
2797                                                 c = '\n';
2798                                                 break;
2799                                         case 'r':
2800                                                 c = '\r';
2801                                                 break;
2802                                         case 't':
2803                                                 c = '\t';
2804                                                 break;
2805                                         case 'v':
2806                                                 c = '\v';
2807                                                 break;
2808
2809                                                 /*
2810                                                  * in all other cases, take the char after '\'
2811                                                  * literally
2812                                                  */
2813                                 }
2814                         }
2815
2816                         /* Add c to output string */
2817                         *output_ptr++ = c;
2818                 }
2819
2820                 /* Terminate attribute value in output area */
2821                 *output_ptr++ = '\0';
2822
2823                 /* Check whether raw input matched null marker */
2824                 input_len = end_ptr - start_ptr;
2825                 if (input_len == cstate->null_print_len &&
2826                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
2827                         fieldvals[fieldno] = NULL;
2828
2829                 fieldno++;
2830                 /* Done if we hit EOL instead of a delim */
2831                 if (!found_delim)
2832                         break;
2833         }
2834
2835         /* Clean up state of attribute_buf */
2836         output_ptr--;
2837         Assert(*output_ptr == '\0');
2838         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
2839
2840         return fieldno;
2841 }
2842
2843 /*
2844  * Parse the current line into separate attributes (fields),
2845  * performing de-escaping as needed.  This has exactly the same API as
2846  * CopyReadAttributesText, except we parse the fields according to
2847  * "standard" (i.e. common) CSV usage.
2848  */
2849 static int
2850 CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
2851 {
2852         char            delimc = cstate->delim[0];
2853         char            quotec = cstate->quote[0];
2854         char            escapec = cstate->escape[0];
2855         int                     fieldno;
2856         char       *output_ptr;
2857         char       *cur_ptr;
2858         char       *line_end_ptr;
2859
2860         /*
2861          * We need a special case for zero-column tables: check that the input
2862          * line is empty, and return.
2863          */
2864         if (maxfields <= 0)
2865         {
2866                 if (cstate->line_buf.len != 0)
2867                         ereport(ERROR,
2868                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2869                                          errmsg("extra data after last expected column")));
2870                 return 0;
2871         }
2872
2873         resetStringInfo(&cstate->attribute_buf);
2874
2875         /*
2876          * The de-escaped attributes will certainly not be longer than the input
2877          * data line, so we can just force attribute_buf to be large enough and
2878          * then transfer data without any checks for enough space.      We need to do
2879          * it this way because enlarging attribute_buf mid-stream would invalidate
2880          * pointers already stored into fieldvals[].
2881          */
2882         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
2883                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
2884         output_ptr = cstate->attribute_buf.data;
2885
2886         /* set pointer variables for loop */
2887         cur_ptr = cstate->line_buf.data;
2888         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
2889
2890         /* Outer loop iterates over fields */
2891         fieldno = 0;
2892         for (;;)
2893         {
2894                 bool            found_delim = false;
2895                 bool            in_quote = false;
2896                 bool            saw_quote = false;
2897                 char       *start_ptr;
2898                 char       *end_ptr;
2899                 int                     input_len;
2900
2901                 /* Make sure space remains in fieldvals[] */
2902                 if (fieldno >= maxfields)
2903                         ereport(ERROR,
2904                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2905                                          errmsg("extra data after last expected column")));
2906
2907                 /* Remember start of field on both input and output sides */
2908                 start_ptr = cur_ptr;
2909                 fieldvals[fieldno] = output_ptr;
2910
2911                 /* Scan data for field */
2912                 for (;;)
2913                 {
2914                         char            c;
2915
2916                         end_ptr = cur_ptr;
2917                         if (cur_ptr >= line_end_ptr)
2918                                 break;
2919                         c = *cur_ptr++;
2920                         /* unquoted field delimiter */
2921                         if (c == delimc && !in_quote)
2922                         {
2923                                 found_delim = true;
2924                                 break;
2925                         }
2926                         /* start of quoted field (or part of field) */
2927                         if (c == quotec && !in_quote)
2928                         {
2929                                 saw_quote = true;
2930                                 in_quote = true;
2931                                 continue;
2932                         }
2933                         /* escape within a quoted field */
2934                         if (c == escapec && in_quote)
2935                         {
2936                                 /*
2937                                  * peek at the next char if available, and escape it if it is
2938                                  * an escape char or a quote char
2939                                  */
2940                                 if (cur_ptr < line_end_ptr)
2941                                 {
2942                                         char            nextc = *cur_ptr;
2943
2944                                         if (nextc == escapec || nextc == quotec)
2945                                         {
2946                                                 *output_ptr++ = nextc;
2947                                                 cur_ptr++;
2948                                                 continue;
2949                                         }
2950                                 }
2951                         }
2952
2953                         /*
2954                          * end of quoted field. Must do this test after testing for escape
2955                          * in case quote char and escape char are the same (which is the
2956                          * common case).
2957                          */
2958                         if (c == quotec && in_quote)
2959                         {
2960                                 in_quote = false;
2961                                 continue;
2962                         }
2963
2964                         /* Add c to output string */
2965                         *output_ptr++ = c;
2966                 }
2967
2968                 /* Terminate attribute value in output area */
2969                 *output_ptr++ = '\0';
2970
2971                 /* Shouldn't still be in quote mode */
2972                 if (in_quote)
2973                         ereport(ERROR,
2974                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2975                                          errmsg("unterminated CSV quoted field")));
2976
2977                 /* Check whether raw input matched null marker */
2978                 input_len = end_ptr - start_ptr;
2979                 if (!saw_quote && input_len == cstate->null_print_len &&
2980                         strncmp(start_ptr, cstate->null_print, input_len) == 0)
2981                         fieldvals[fieldno] = NULL;
2982
2983                 fieldno++;
2984                 /* Done if we hit EOL instead of a delim */
2985                 if (!found_delim)
2986                         break;
2987         }
2988
2989         /* Clean up state of attribute_buf */
2990         output_ptr--;
2991         Assert(*output_ptr == '\0');
2992         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
2993
2994         return fieldno;
2995 }
2996
2997
2998 /*
2999  * Read a binary attribute
3000  */
3001 static Datum
3002 CopyReadBinaryAttribute(CopyState cstate,
3003                                                 int column_no, FmgrInfo *flinfo,
3004                                                 Oid typioparam, int32 typmod,
3005                                                 bool *isnull)
3006 {
3007         int32           fld_size;
3008         Datum           result;
3009
3010         if (!CopyGetInt32(cstate, &fld_size))
3011                 ereport(ERROR,
3012                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3013                                  errmsg("unexpected EOF in COPY data")));
3014         if (fld_size == -1)
3015         {
3016                 *isnull = true;
3017                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
3018         }
3019         if (fld_size < 0)
3020                 ereport(ERROR,
3021                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3022                                  errmsg("invalid field size")));
3023
3024         /* reset attribute_buf to empty, and load raw data in it */
3025         resetStringInfo(&cstate->attribute_buf);
3026
3027         enlargeStringInfo(&cstate->attribute_buf, fld_size);
3028         if (CopyGetData(cstate, cstate->attribute_buf.data,
3029                                         fld_size, fld_size) != fld_size)
3030                 ereport(ERROR,
3031                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3032                                  errmsg("unexpected EOF in COPY data")));
3033
3034         cstate->attribute_buf.len = fld_size;
3035         cstate->attribute_buf.data[fld_size] = '\0';
3036
3037         /* Call the column type's binary input converter */
3038         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
3039                                                                  typioparam, typmod);
3040
3041         /* Trouble if it didn't eat the whole buffer */
3042         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
3043                 ereport(ERROR,
3044                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
3045                                  errmsg("incorrect binary data format")));
3046
3047         *isnull = false;
3048         return result;
3049 }
3050
3051 /*
3052  * Send text representation of one attribute, with conversion and escaping
3053  */
3054 #define DUMPSOFAR() \
3055         do { \
3056                 if (ptr > start) \
3057                         CopySendData(cstate, start, ptr - start); \
3058         } while (0)
3059
3060 static void
3061 CopyAttributeOutText(CopyState cstate, char *string)
3062 {
3063         char       *ptr;
3064         char       *start;
3065         char            c;
3066         char            delimc = cstate->delim[0];
3067
3068         if (cstate->need_transcoding)
3069                 ptr = pg_server_to_client(string, strlen(string));
3070         else
3071                 ptr = string;
3072
3073         /*
3074          * We have to grovel through the string searching for control characters
3075          * and instances of the delimiter character.  In most cases, though, these
3076          * are infrequent.      To avoid overhead from calling CopySendData once per
3077          * character, we dump out all characters between replaceable characters in
3078          * a single call.  The loop invariant is that the data from "start" to
3079          * "ptr" can be sent literally, but hasn't yet been.
3080          */
3081         start = ptr;
3082         while ((c = *ptr) != '\0')
3083         {
3084                 switch (c)
3085                 {
3086                         case '\b':
3087                                 DUMPSOFAR();
3088                                 CopySendString(cstate, "\\b");
3089                                 start = ++ptr;
3090                                 break;
3091                         case '\f':
3092                                 DUMPSOFAR();
3093                                 CopySendString(cstate, "\\f");
3094                                 start = ++ptr;
3095                                 break;
3096                         case '\n':
3097                                 DUMPSOFAR();
3098                                 CopySendString(cstate, "\\n");
3099                                 start = ++ptr;
3100                                 break;
3101                         case '\r':
3102                                 DUMPSOFAR();
3103                                 CopySendString(cstate, "\\r");
3104                                 start = ++ptr;
3105                                 break;
3106                         case '\t':
3107                                 DUMPSOFAR();
3108                                 CopySendString(cstate, "\\t");
3109                                 start = ++ptr;
3110                                 break;
3111                         case '\v':
3112                                 DUMPSOFAR();
3113                                 CopySendString(cstate, "\\v");
3114                                 start = ++ptr;
3115                                 break;
3116                         case '\\':
3117                                 DUMPSOFAR();
3118                                 CopySendString(cstate, "\\\\");
3119                                 start = ++ptr;
3120                                 break;
3121                         default:
3122                                 if (c == delimc)
3123                                 {
3124                                         DUMPSOFAR();
3125                                         CopySendChar(cstate, '\\');
3126                                         start = ptr;    /* we include char in next run */
3127                                 }
3128
3129                                 /*
3130                                  * We can skip pg_encoding_mblen() overhead when encoding is
3131                                  * safe, because in valid backend encodings, extra bytes of a
3132                                  * multibyte character never look like ASCII.
3133                                  */
3134                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3135                                         ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
3136                                 else
3137                                         ptr++;
3138                                 break;
3139                 }
3140         }
3141
3142         DUMPSOFAR();
3143 }
3144
3145 /*
3146  * Send text representation of one attribute, with conversion and
3147  * CSV-style escaping
3148  */
3149 static void
3150 CopyAttributeOutCSV(CopyState cstate, char *string,
3151                                         bool use_quote, bool single_attr)
3152 {
3153         char       *ptr;
3154         char       *start;
3155         char            c;
3156         char            delimc = cstate->delim[0];
3157         char            quotec = cstate->quote[0];
3158         char            escapec = cstate->escape[0];
3159
3160         /* force quoting if it matches null_print (before conversion!) */
3161         if (!use_quote && strcmp(string, cstate->null_print) == 0)
3162                 use_quote = true;
3163
3164         if (cstate->need_transcoding)
3165                 ptr = pg_server_to_client(string, strlen(string));
3166         else
3167                 ptr = string;
3168
3169         /*
3170          * Make a preliminary pass to discover if it needs quoting
3171          */
3172         if (!use_quote)
3173         {
3174                 /*
3175                  * Because '\.' can be a data value, quote it if it appears alone on a
3176                  * line so it is not interpreted as the end-of-data marker.
3177                  */
3178                 if (single_attr && strcmp(ptr, "\\.") == 0)
3179                         use_quote = true;
3180                 else
3181                 {
3182                         char       *tptr = ptr;
3183
3184                         while ((c = *tptr) != '\0')
3185                         {
3186                                 if (c == delimc || c == quotec || c == '\n' || c == '\r')
3187                                 {
3188                                         use_quote = true;
3189                                         break;
3190                                 }
3191                                 if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3192                                         tptr += pg_encoding_mblen(cstate->client_encoding, tptr);
3193                                 else
3194                                         tptr++;
3195                         }
3196                 }
3197         }
3198
3199         if (use_quote)
3200         {
3201                 CopySendChar(cstate, quotec);
3202
3203                 /*
3204                  * We adopt the same optimization strategy as in CopyAttributeOutText
3205                  */
3206                 start = ptr;
3207                 while ((c = *ptr) != '\0')
3208                 {
3209                         if (c == quotec || c == escapec)
3210                         {
3211                                 DUMPSOFAR();
3212                                 CopySendChar(cstate, escapec);
3213                                 start = ptr;    /* we include char in next run */
3214                         }
3215                         if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
3216                                 ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
3217                         else
3218                                 ptr++;
3219                 }
3220                 DUMPSOFAR();
3221
3222                 CopySendChar(cstate, quotec);
3223         }
3224         else
3225         {
3226                 /* If it doesn't need quoting, we can just dump it as-is */
3227                 CopySendString(cstate, ptr);
3228         }
3229 }
3230
3231 /*
3232  * CopyGetAttnums - build an integer list of attnums to be copied
3233  *
3234  * The input attnamelist is either the user-specified column list,
3235  * or NIL if there was none (in which case we want all the non-dropped
3236  * columns).
3237  *
3238  * rel can be NULL ... it's only used for error reports.
3239  */
3240 static List *
3241 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
3242 {
3243         List       *attnums = NIL;
3244
3245         if (attnamelist == NIL)
3246         {
3247                 /* Generate default column list */
3248                 Form_pg_attribute *attr = tupDesc->attrs;
3249                 int                     attr_count = tupDesc->natts;
3250                 int                     i;
3251
3252                 for (i = 0; i < attr_count; i++)
3253                 {
3254                         if (attr[i]->attisdropped)
3255                                 continue;
3256                         attnums = lappend_int(attnums, i + 1);
3257                 }
3258         }
3259         else
3260         {
3261                 /* Validate the user-supplied list and extract attnums */
3262                 ListCell   *l;
3263
3264                 foreach(l, attnamelist)
3265                 {
3266                         char       *name = strVal(lfirst(l));
3267                         int                     attnum;
3268                         int                     i;
3269
3270                         /* Lookup column name */
3271                         attnum = InvalidAttrNumber;
3272                         for (i = 0; i < tupDesc->natts; i++)
3273                         {
3274                                 if (tupDesc->attrs[i]->attisdropped)
3275                                         continue;
3276                                 if (namestrcmp(&(tupDesc->attrs[i]->attname), name) == 0)
3277                                 {
3278                                         attnum = tupDesc->attrs[i]->attnum;
3279                                         break;
3280                                 }
3281                         }
3282                         if (attnum == InvalidAttrNumber)
3283                         {
3284                                 if (rel != NULL)
3285                                         ereport(ERROR,
3286                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
3287                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
3288                                                    name, RelationGetRelationName(rel))));
3289                                 else
3290                                         ereport(ERROR,
3291                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
3292                                                          errmsg("column \"%s\" does not exist",
3293                                                                         name)));
3294                         }
3295                         /* Check for duplicates */
3296                         if (list_member_int(attnums, attnum))
3297                                 ereport(ERROR,
3298                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
3299                                                  errmsg("column \"%s\" specified more than once",
3300                                                                 name)));
3301                         attnums = lappend_int(attnums, attnum);
3302                 }
3303         }
3304
3305         return attnums;
3306 }
3307
3308
3309 /*
3310  * copy_dest_startup --- executor startup
3311  */
3312 static void
3313 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
3314 {
3315         /* no-op */
3316 }
3317
3318 /*
3319  * copy_dest_receive --- receive one tuple
3320  */
3321 static void
3322 copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
3323 {
3324         DR_copy    *myState = (DR_copy *) self;
3325         CopyState       cstate = myState->cstate;
3326
3327         /* Make sure the tuple is fully deconstructed */
3328         slot_getallattrs(slot);
3329
3330         /* And send the data */
3331         CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
3332 }
3333
3334 /*
3335  * copy_dest_shutdown --- executor end
3336  */
3337 static void
3338 copy_dest_shutdown(DestReceiver *self)
3339 {
3340         /* no-op */
3341 }
3342
3343 /*
3344  * copy_dest_destroy --- release DestReceiver object
3345  */
3346 static void
3347 copy_dest_destroy(DestReceiver *self)
3348 {
3349         pfree(self);
3350 }
3351
3352 /*
3353  * CreateCopyDestReceiver -- create a suitable DestReceiver object
3354  */
3355 DestReceiver *
3356 CreateCopyDestReceiver(void)
3357 {
3358         DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
3359
3360         self->pub.receiveSlot = copy_dest_receive;
3361         self->pub.rStartup = copy_dest_startup;
3362         self->pub.rShutdown = copy_dest_shutdown;
3363         self->pub.rDestroy = copy_dest_destroy;
3364         self->pub.mydest = DestCopyOut;
3365
3366         self->cstate = NULL;            /* will be set later */
3367
3368         return (DestReceiver *) self;
3369 }