granicus.if.org Git - postgresql/blob - doc/src/sgml/ref/copy.sgml

   1 <!--
   2 doc/src/sgml/ref/copy.sgml
   3 PostgreSQL documentation
   4 -->
   5
   6
   7 <refentry id="SQL-COPY">
   8  <refmeta>
   9   <refentrytitle>COPY</refentrytitle>
  10   <manvolnum>7</manvolnum>
  11   <refmiscinfo>SQL - Language Statements</refmiscinfo>
  12  </refmeta>
  13
  14  <refnamediv>
  15   <refname>COPY</refname>
  16   <refpurpose>copy data between a file and a table</refpurpose>
  17  </refnamediv>
  18
  19  <indexterm zone="sql-copy">
  20   <primary>COPY</primary>
  21  </indexterm>
  22
  23  <refsynopsisdiv>
  24 <synopsis>
  25 COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
  26     FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN }
  27     [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
  28
  29 COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
  30     TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT }
  31     [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
  32
  33 <phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
  34
  35     FORMAT <replaceable class="parameter">format_name</replaceable>
  36     OIDS [ <replaceable class="parameter">boolean</replaceable> ]
  37     FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
  38     DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
  39     NULL '<replaceable class="parameter">null_string</replaceable>'
  40     HEADER [ <replaceable class="parameter">boolean</replaceable> ]
  41     QUOTE '<replaceable class="parameter">quote_character</replaceable>'
  42     ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
  43     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
  44     FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) |
  45     ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
  46 </synopsis>
  47  </refsynopsisdiv>
  48
  49  <refsect1>
  50   <title>Description</title>
  51
  52   <para>
  53    <command>COPY</command> moves data between
  54    <productname>PostgreSQL</productname> tables and standard file-system
  55    files. <command>COPY TO</command> copies the contents of a table
  56    <emphasis>to</> a file, while <command>COPY FROM</command> copies
  57    data <emphasis>from</> a file to a table (appending the data to
  58    whatever is in the table already).  <command>COPY TO</command>
  59    can also copy the results of a <command>SELECT</> query.
  60   </para>
  61
  62   <para>
  63    If a list of columns is specified, <command>COPY</command> will
  64    only copy the data in the specified columns to or from the file.
  65    If there are any columns in the table that are not in the column list,
  66    <command>COPY FROM</command> will insert the default values for
  67    those columns.
  68   </para>
  69
  70   <para>
  71    <command>COPY</command> with a file name instructs the
  72    <productname>PostgreSQL</productname> server to directly read from
  73    or write to a file. The file must be accessible to the server and
  74    the name must be specified from the viewpoint of the server. When
  75    <literal>PROGRAM</literal> is specified, the server executes the
  76    given command, and reads from its standard input, or writes to its
  77    standard output. The command must be specified from the viewpoint of the
  78    server, and be executable by the <literal>postgres</> user. When
  79    <literal>STDIN</literal> or <literal>STDOUT</literal> is
  80    specified, data is transmitted via the connection between the
  81    client and the server.
  82   </para>
  83  </refsect1>
  84
  85  <refsect1>
  86   <title>Parameters</title>
  87
  88   <variablelist>
  89    <varlistentry>
  90     <term><replaceable class="parameter">table_name</replaceable></term>
  91     <listitem>
  92      <para>
  93       The name (optionally schema-qualified) of an existing table.
  94      </para>
  95     </listitem>
  96    </varlistentry>
  97
  98    <varlistentry>
  99     <term><replaceable class="parameter">column_name</replaceable></term>
 100      <listitem>
 101      <para>
 102       An optional list of columns to be copied.  If no column list is
 103       specified, all columns of the table will be copied.
 104      </para>
 105     </listitem>
 106    </varlistentry>
 107
 108    <varlistentry>
 109     <term><replaceable class="parameter">query</replaceable></term>
 110     <listitem>
 111      <para>
 112       A <xref linkend="sql-select"> or
 113       <xref linkend="sql-values"> command
 114       whose results are to be copied.
 115       Note that parentheses are required around the query.
 116      </para>
 117     </listitem>
 118    </varlistentry>
 119
 120    <varlistentry>
 121     <term><replaceable class="parameter">filename</replaceable></term>
 122     <listitem>
 123      <para>
 124       The path name of the input or output file.  An input filename can be
 125       an absolute or relative path, but an output filename must be an absolute
 126       path.  Windows users might need to use an <literal>E''</> string and
 127       double any backslashes used in the path name.
 128      </para>
 129     </listitem>
 130    </varlistentry>
 131
 132    <varlistentry>
 133     <term><literal>PROGRAM</literal></term>
 134     <listitem>
 135      <para>
 136       A command to execute. In <command>COPY FROM</command>, the input is
 137       read from standard output of the command, and in <command>COPY TO</>,
 138       the output is written to the standard input of the command.
 139      </para>
 140      <para>
 141       Note that the command is invoked by the shell, so if you need to pass
 142       any arguments to shell command that come from an untrusted source, you
 143       must be careful to strip or escape any special characters that might
 144       have a special meaning for the shell. For security reasons, it is best
 145       to use a fixed command string, or at least avoid passing any user input
 146       in it.
 147      </para>
 148     </listitem>
 149    </varlistentry>
 150
 151    <varlistentry>
 152     <term><literal>STDIN</literal></term>
 153     <listitem>
 154      <para>
 155       Specifies that input comes from the client application.
 156      </para>
 157     </listitem>
 158    </varlistentry>
 159
 160    <varlistentry>
 161     <term><literal>STDOUT</literal></term>
 162     <listitem>
 163      <para>
 164       Specifies that output goes to the client application.
 165      </para>
 166     </listitem>
 167    </varlistentry>
 168
 169    <varlistentry>
 170     <term><replaceable class="parameter">boolean</replaceable></term>
 171     <listitem>
 172      <para>
 173       Specifies whether the selected option should be turned on or off.
 174       You can write <literal>TRUE</literal>, <literal>ON</>, or
 175       <literal>1</literal> to enable the option, and <literal>FALSE</literal>,
 176       <literal>OFF</>, or <literal>0</literal> to disable it.  The
 177       <replaceable class="parameter">boolean</replaceable> value can also
 178       be omitted, in which case <literal>TRUE</literal> is assumed.
 179      </para>
 180     </listitem>
 181    </varlistentry>
 182
 183    <varlistentry>
 184     <term><literal>FORMAT</literal></term>
 185     <listitem>
 186      <para>
 187       Selects the data format to be read or written:
 188       <literal>text</>,
 189       <literal>csv</> (Comma Separated Values),
 190       or <literal>binary</>.
 191       The default is <literal>text</>.
 192      </para>
 193     </listitem>
 194    </varlistentry>
 195
 196    <varlistentry>
 197     <term><literal>OIDS</literal></term>
 198     <listitem>
 199      <para>
 200       Specifies copying the OID for each row.  (An error is raised if
 201       <literal>OIDS</literal> is specified for a table that does not
 202       have OIDs, or in the case of copying a <replaceable
 203       class="parameter">query</replaceable>.)
 204      </para>
 205     </listitem>
 206    </varlistentry>
 207
 208    <varlistentry>
 209     <term><literal>FREEZE</literal></term>
 210     <listitem>
 211      <para>
 212       Requests copying the data with rows already frozen, just as they
 213       would be after running the <command>VACUUM FREEZE</> command.
 214       This is intended as a performance option for initial data loading.
 215       Rows will be frozen only if the table being loaded has been created
 216       or truncated in the current subtransaction, there are no cursors
 217       open and there are no older snapshots held by this transaction.
 218      </para>
 219      <para>
 220       Note that all other sessions will immediately be able to see the data
 221       once it has been successfully loaded. This violates the normal rules
 222       of MVCC visibility and users specifying should be aware of the
 223       potential problems this might cause.
 224      </para>
 225     </listitem>
 226    </varlistentry>
 227
 228    <varlistentry>
 229     <term><literal>DELIMITER</literal></term>
 230     <listitem>
 231      <para>
 232       Specifies the character that separates columns within each row
 233       (line) of the file.  The default is a tab character in text format,
 234       a comma in <literal>CSV</> format.
 235       This must be a single one-byte character.
 236       This option is not allowed when using <literal>binary</> format.
 237      </para>
 238     </listitem>
 239    </varlistentry>
 240
 241    <varlistentry>
 242     <term><literal>NULL</literal></term>
 243     <listitem>
 244      <para>
 245       Specifies the string that represents a null value. The default is
 246       <literal>\N</literal> (backslash-N) in text format, and an unquoted empty
 247       string in <literal>CSV</> format. You might prefer an
 248       empty string even in text format for cases where you don't want to
 249       distinguish nulls from empty strings.
 250       This option is not allowed when using <literal>binary</> format.
 251      </para>
 252
 253      <note>
 254       <para>
 255        When using <command>COPY FROM</command>, any data item that matches
 256        this string will be stored as a null value, so you should make
 257        sure that you use the same string as you used with
 258        <command>COPY TO</command>.
 259       </para>
 260      </note>
 261
 262     </listitem>
 263    </varlistentry>
 264
 265    <varlistentry>
 266     <term><literal>HEADER</literal></term>
 267     <listitem>
 268      <para>
 269       Specifies that the file contains a header line with the names of each
 270       column in the file.  On output, the first line contains the column
 271       names from the table, and on input, the first line is ignored.
 272       This option is allowed only when using <literal>CSV</> format.
 273      </para>
 274     </listitem>
 275    </varlistentry>
 276
 277    <varlistentry>
 278     <term><literal>QUOTE</literal></term>
 279     <listitem>
 280      <para>
 281       Specifies the quoting character to be used when a data value is quoted.
 282       The default is double-quote.
 283       This must be a single one-byte character.
 284       This option is allowed only when using <literal>CSV</> format.
 285      </para>
 286     </listitem>
 287    </varlistentry>
 288
 289    <varlistentry>
 290     <term><literal>ESCAPE</literal></term>
 291     <listitem>
 292      <para>
 293       Specifies the character that should appear before a
 294       data character that matches the <literal>QUOTE</> value.
 295       The default is the same as the <literal>QUOTE</> value (so that
 296       the quoting character is doubled if it appears in the data).
 297       This must be a single one-byte character.
 298       This option is allowed only when using <literal>CSV</> format.
 299      </para>
 300     </listitem>
 301    </varlistentry>
 302
 303    <varlistentry>
 304     <term><literal>FORCE_QUOTE</></term>
 305     <listitem>
 306      <para>
 307       Forces quoting to be
 308       used for all non-<literal>NULL</> values in each specified column.
 309       <literal>NULL</> output is never quoted. If <literal>*</> is specified,
 310       non-<literal>NULL</> values will be quoted in all columns.
 311       This option is allowed only in <command>COPY TO</>, and only when
 312       using <literal>CSV</> format.
 313      </para>
 314     </listitem>
 315    </varlistentry>
 316
 317    <varlistentry>
 318     <term><literal>FORCE_NOT_NULL</></term>
 319     <listitem>
 320      <para>
 321       Do not match the specified columns' values against the null string.
 322       In the default case where the null string is empty, this means that
 323       empty values will be read as zero-length strings rather than nulls,
 324       even when they are not quoted.
 325       This option is allowed only in <command>COPY FROM</>, and only when
 326       using <literal>CSV</> format.
 327      </para>
 328     </listitem>
 329    </varlistentry>
 330
 331    <varlistentry>
 332     <term><literal>ENCODING</></term>
 333     <listitem>
 334      <para>
 335       Specifies that the file is encoded in the <replaceable
 336       class="parameter">encoding_name</replaceable>.  If this option is
 337       omitted, the current client encoding is used. See the Notes below
 338       for more details.
 339      </para>
 340     </listitem>
 341    </varlistentry>
 342
 343   </variablelist>
 344  </refsect1>
 345
 346  <refsect1>
 347   <title>Outputs</title>
 348
 349   <para>
 350    On successful completion, a <command>COPY</> command returns a command
 351    tag of the form
 352 <screen>
 353 COPY <replaceable class="parameter">count</replaceable>
 354 </screen>
 355    The <replaceable class="parameter">count</replaceable> is the number
 356    of rows copied.
 357   </para>
 358  </refsect1>
 359
 360  <refsect1>
 361   <title>Notes</title>
 362
 363    <para>
 364     <command>COPY</command> can only be used with plain tables, not
 365     with views.  However, you can write <literal>COPY (SELECT * FROM
 366     <replaceable class="parameter">viewname</replaceable>) TO ...</literal>.
 367    </para>
 368
 369    <para>
 370     <command>COPY</command> only deals with the specific table named;
 371     it does not copy data to or from child tables.  Thus for example
 372     <literal>COPY <replaceable class="parameter">table</> TO</literal>
 373     shows the same data as <literal>SELECT * FROM ONLY <replaceable
 374     class="parameter">table</></literal>.  But <literal>COPY
 375     (SELECT * FROM <replaceable class="parameter">table</>) TO ...</literal>
 376     can be used to dump all of the data in an inheritance hierarchy.
 377    </para>
 378
 379    <para>
 380     You must have select privilege on the table
 381     whose values are read by <command>COPY TO</command>, and
 382     insert privilege on the table into which values
 383     are inserted by <command>COPY FROM</command>.  It is sufficient
 384     to have column privileges on the column(s) listed in the command.
 385    </para>
 386
 387    <para>
 388     Files named in a <command>COPY</command> command are read or written
 389     directly by the server, not by the client application. Therefore,
 390     they must reside on or be accessible to the database server machine,
 391     not the client. They must be accessible to and readable or writable
 392     by the <productname>PostgreSQL</productname> user (the user ID the
 393     server runs as), not the client. Similarly,
 394     the command specified with <literal>PROGRAM</literal> is executed directly
 395     by the server, not by the client application, must be executable by the
 396     <productname>PostgreSQL</productname> user.
 397     <command>COPY</command> naming a file or command is only allowed to
 398     database superusers, since it allows reading or writing any file that the
 399     server has privileges to access.
 400    </para>
 401
 402    <para>
 403     Do not confuse <command>COPY</command> with the
 404     <application>psql</application> instruction
 405     <command><link linkend="APP-PSQL-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes
 406     <command>COPY FROM STDIN</command> or <command>COPY TO
 407     STDOUT</command>, and then fetches/stores the data in a file
 408     accessible to the <application>psql</application> client. Thus,
 409     file accessibility and access rights depend on the client rather
 410     than the server when <command>\copy</command> is used.
 411    </para>
 412
 413    <para>
 414     It is recommended that the file name used in <command>COPY</command>
 415     always be specified as an absolute path. This is enforced by the
 416     server in the case of <command>COPY TO</command>, but for
 417     <command>COPY FROM</command> you do have the option of reading from
 418     a file specified by a relative path. The path will be interpreted
 419     relative to the working directory of the server process (normally
 420     the cluster's data directory), not the client's working directory.
 421    </para>
 422
 423    <para>
 424     Executing a command with <literal>PROGRAM</literal> might be restricted
 425     by the operating system's access control mechanisms, such as SELinux.
 426    </para>
 427
 428    <para>
 429     <command>COPY FROM</command> will invoke any triggers and check
 430     constraints on the destination table. However, it will not invoke rules.
 431    </para>
 432
 433    <para>
 434     <command>COPY</command> input and output is affected by
 435     <varname>DateStyle</varname>. To ensure portability to other
 436     <productname>PostgreSQL</productname> installations that might use
 437     non-default <varname>DateStyle</varname> settings,
 438     <varname>DateStyle</varname> should be set to <literal>ISO</> before
 439     using <command>COPY TO</>.  It is also a good idea to avoid dumping
 440     data with <varname>IntervalStyle</varname> set to
 441     <literal>sql_standard</>, because negative interval values might be
 442     misinterpreted by a server that has a different setting for
 443     <varname>IntervalStyle</varname>.
 444    </para>
 445
 446    <para>
 447     Input data is interpreted according to <literal>ENCODING</literal>
 448     option or the current client encoding, and output data is encoded
 449     in <literal>ENCODING</literal> or the current client encoding, even
 450     if the data does not pass through the client but is read from or
 451     written to a file directly by the server.
 452    </para>
 453
 454    <para>
 455     <command>COPY</command> stops operation at the first error. This
 456     should not lead to problems in the event of a <command>COPY
 457     TO</command>, but the target table will already have received
 458     earlier rows in a <command>COPY FROM</command>. These rows will not
 459     be visible or accessible, but they still occupy disk space. This might
 460     amount to a considerable amount of wasted disk space if the failure
 461     happened well into a large copy operation. You might wish to invoke
 462     <command>VACUUM</command> to recover the wasted space.
 463    </para>
 464
 465  </refsect1>
 466
 467  <refsect1>
 468   <title>File Formats</title>
 469
 470   <refsect2>
 471    <title>Text Format</title>
 472
 473    <para>
 474     When the <literal>text</> format is used,
 475     the data read or written is a text file with one line per table row.
 476     Columns in a row are separated by the delimiter character.
 477     The column values themselves are strings generated by the
 478     output function, or acceptable to the input function, of each
 479     attribute's data type.  The specified null string is used in
 480     place of columns that are null.
 481     <command>COPY FROM</command> will raise an error if any line of the
 482     input file contains more or fewer columns than are expected.
 483     If <literal>OIDS</literal> is specified, the OID is read or written as the first column,
 484     preceding the user data columns.
 485    </para>
 486
 487    <para>
 488     End of data can be represented by a single line containing just
 489     backslash-period (<literal>\.</>).  An end-of-data marker is
 490     not necessary when reading from a file, since the end of file
 491     serves perfectly well; it is needed only when copying data to or from
 492     client applications using pre-3.0 client protocol.
 493    </para>
 494
 495    <para>
 496     Backslash characters (<literal>\</>) can be used in the
 497     <command>COPY</command> data to quote data characters that might
 498     otherwise be taken as row or column delimiters. In particular, the
 499     following characters <emphasis>must</> be preceded by a backslash if
 500     they appear as part of a column value: backslash itself,
 501     newline, carriage return, and the current delimiter character.
 502    </para>
 503
 504    <para>
 505     The specified null string is sent by <command>COPY TO</command> without
 506     adding any backslashes; conversely, <command>COPY FROM</command> matches
 507     the input against the null string before removing backslashes.  Therefore,
 508     a null string such as <literal>\N</literal> cannot be confused with
 509     the actual data value <literal>\N</literal> (which would be represented
 510     as <literal>\\N</literal>).
 511    </para>
 512
 513    <para>
 514     The following special backslash sequences are recognized by
 515     <command>COPY FROM</command>:
 516
 517    <informaltable>
 518     <tgroup cols="2">
 519      <thead>
 520       <row>
 521        <entry>Sequence</entry>
 522        <entry>Represents</entry>
 523       </row>
 524      </thead>
 525
 526      <tbody>
 527       <row>
 528        <entry><literal>\b</></entry>
 529        <entry>Backspace (ASCII 8)</entry>
 530       </row>
 531       <row>
 532        <entry><literal>\f</></entry>
 533        <entry>Form feed (ASCII 12)</entry>
 534       </row>
 535       <row>
 536        <entry><literal>\n</></entry>
 537        <entry>Newline (ASCII 10)</entry>
 538       </row>
 539       <row>
 540        <entry><literal>\r</></entry>
 541        <entry>Carriage return (ASCII 13)</entry>
 542       </row>
 543       <row>
 544        <entry><literal>\t</></entry>
 545        <entry>Tab (ASCII 9)</entry>
 546       </row>
 547       <row>
 548        <entry><literal>\v</></entry>
 549        <entry>Vertical tab (ASCII 11)</entry>
 550       </row>
 551       <row>
 552        <entry><literal>\</><replaceable>digits</></entry>
 553        <entry>Backslash followed by one to three octal digits specifies
 554        the character with that numeric code</entry>
 555       </row>
 556       <row>
 557        <entry><literal>\x</><replaceable>digits</></entry>
 558        <entry>Backslash <literal>x</> followed by one or two hex digits specifies
 559        the character with that numeric code</entry>
 560       </row>
 561      </tbody>
 562     </tgroup>
 563    </informaltable>
 564
 565     Presently, <command>COPY TO</command> will never emit an octal or
 566     hex-digits backslash sequence, but it does use the other sequences
 567     listed above for those control characters.
 568    </para>
 569
 570    <para>
 571     Any other backslashed character that is not mentioned in the above table
 572     will be taken to represent itself.  However, beware of adding backslashes
 573     unnecessarily, since that might accidentally produce a string matching the
 574     end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by
 575     default).  These strings will be recognized before any other backslash
 576     processing is done.
 577    </para>
 578
 579    <para>
 580     It is strongly recommended that applications generating <command>COPY</command> data convert
 581     data newlines and carriage returns to the <literal>\n</> and
 582     <literal>\r</> sequences respectively.  At present it is
 583     possible to represent a data carriage return by a backslash and carriage
 584     return, and to represent a data newline by a backslash and newline.
 585     However, these representations might not be accepted in future releases.
 586     They are also highly vulnerable to corruption if the <command>COPY</command> file is
 587     transferred across different machines (for example, from Unix to Windows
 588     or vice versa).
 589    </para>
 590
 591    <para>
 592     <command>COPY TO</command> will terminate each row with a Unix-style
 593     newline (<quote><literal>\n</></>).  Servers running on Microsoft Windows instead
 594     output carriage return/newline (<quote><literal>\r\n</></>), but only for
 595     <command>COPY</> to a server file; for consistency across platforms,
 596     <command>COPY TO STDOUT</> always sends <quote><literal>\n</></>
 597     regardless of server platform.
 598     <command>COPY FROM</command> can handle lines ending with newlines,
 599     carriage returns, or carriage return/newlines.  To reduce the risk of
 600     error due to un-backslashed newlines or carriage returns that were
 601     meant as data, <command>COPY FROM</command> will complain if the line
 602     endings in the input are not all alike.
 603    </para>
 604   </refsect2>
 605
 606   <refsect2>
 607    <title>CSV Format</title>
 608
 609    <para>
 610     This format option is used for importing and exporting the Comma
 611     Separated Value (<literal>CSV</>) file format used by many other
 612     programs, such as spreadsheets. Instead of the escaping rules used by
 613     <productname>PostgreSQL</productname>'s standard text format, it
 614     produces and recognizes the common CSV escaping mechanism.
 615    </para>
 616
 617    <para>
 618     The values in each record are separated by the <literal>DELIMITER</>
 619     character. If the value contains the delimiter character, the
 620     <literal>QUOTE</> character, the <literal>NULL</> string, a carriage
 621     return, or line feed character, then the whole value is prefixed and
 622     suffixed by the <literal>QUOTE</> character, and any occurrence
 623     within the value of a <literal>QUOTE</> character or the
 624     <literal>ESCAPE</> character is preceded by the escape character.
 625     You can also use <literal>FORCE_QUOTE</> to force quotes when outputting
 626     non-<literal>NULL</> values in specific columns.
 627    </para>
 628
 629    <para>
 630     The <literal>CSV</> format has no standard way to distinguish a
 631     <literal>NULL</> value from an empty string.
 632     <productname>PostgreSQL</>'s <command>COPY</> handles this by quoting.
 633     A <literal>NULL</> is output as the <literal>NULL</> parameter string
 634     and is not quoted, while a non-<literal>NULL</> value matching the
 635     <literal>NULL</> parameter string is quoted.  For example, with the
 636     default settings, a <literal>NULL</> is written as an unquoted empty
 637     string, while an empty string data value is written with double quotes
 638     (<literal>""</>). Reading values follows similar rules. You can
 639     use <literal>FORCE_NOT_NULL</> to prevent <literal>NULL</> input
 640     comparisons for specific columns.
 641    </para>
 642
 643    <para>
 644     Because backslash is not a special character in the <literal>CSV</>
 645     format, <literal>\.</>, the end-of-data marker, could also appear
 646     as a data value.  To avoid any misinterpretation, a <literal>\.</>
 647     data value appearing as a lone entry on a line is automatically
 648     quoted on output, and on input, if quoted, is not interpreted as the
 649     end-of-data marker.  If you are loading a file created by another
 650     application that has a single unquoted column and might have a
 651     value of <literal>\.</>, you might need to quote that value in the
 652     input file.
 653    </para>
 654
 655    <note>
 656     <para>
 657      In <literal>CSV</> format, all characters are significant. A quoted value
 658      surrounded by white space, or any characters other than
 659      <literal>DELIMITER</>, will include those characters. This can cause
 660      errors if you import data from a system that pads <literal>CSV</>
 661      lines with white space out to some fixed width. If such a situation
 662      arises you might need to preprocess the <literal>CSV</> file to remove
 663      the trailing white space, before importing the data into
 664      <productname>PostgreSQL</>.
 665     </para>
 666    </note>
 667
 668    <note>
 669     <para>
 670      CSV format will both recognize and produce CSV files with quoted
 671      values containing embedded carriage returns and line feeds. Thus
 672      the files are not strictly one line per table row like text-format
 673      files.
 674     </para>
 675    </note>
 676
 677    <note>
 678     <para>
 679      Many programs produce strange and occasionally perverse CSV files,
 680      so the file format is more a convention than a standard. Thus you
 681      might encounter some files that cannot be imported using this
 682      mechanism, and <command>COPY</> might produce files that other
 683      programs cannot process.
 684     </para>
 685    </note>
 686
 687   </refsect2>
 688
 689   <refsect2>
 690    <title>Binary Format</title>
 691
 692    <para>
 693     The <literal>binary</literal> format option causes all data to be
 694     stored/read as binary format rather than as text.  It is
 695     somewhat faster than the text and <literal>CSV</> formats,
 696     but a binary-format file is less portable across machine architectures and
 697     <productname>PostgreSQL</productname> versions.
 698     Also, the binary format is very data type specific; for example
 699     it will not work to output binary data from a <type>smallint</> column
 700     and read it into an <type>integer</> column, even though that would work
 701     fine in text format.
 702    </para>
 703
 704    <para>
 705     The <literal>binary</> file format consists
 706     of a file header, zero or more tuples containing the row data, and
 707     a file trailer.  Headers and data are in network byte order.
 708    </para>
 709
 710    <note>
 711     <para>
 712      <productname>PostgreSQL</productname> releases before 7.4 used a
 713      different binary file format.
 714     </para>
 715    </note>
 716
 717    <refsect3>
 718     <title>File Header</title>
 719
 720     <para>
 721      The file header consists of 15 bytes of fixed fields, followed
 722      by a variable-length header extension area.  The fixed fields are:
 723
 724     <variablelist>
 725      <varlistentry>
 726       <term>Signature</term>
 727       <listitem>
 728        <para>
 729 11-byte sequence <literal>PGCOPY\n\377\r\n\0</> &mdash; note that the zero byte
 730 is a required part of the signature.  (The signature is designed to allow
 731 easy identification of files that have been munged by a non-8-bit-clean
 732 transfer.  This signature will be changed by end-of-line-translation
 733 filters, dropped zero bytes, dropped high bits, or parity changes.)
 734        </para>
 735       </listitem>
 736      </varlistentry>
 737
 738      <varlistentry>
 739       <term>Flags field</term>
 740       <listitem>
 741        <para>
 742 32-bit integer bit mask to denote important aspects of the file format. Bits
 743 are numbered from 0 (<acronym>LSB</>) to 31 (<acronym>MSB</>).  Note that
 744 this field is stored in network byte order (most significant byte first),
 745 as are all the integer fields used in the file format.  Bits
 746 16-31 are reserved to denote critical file format issues; a reader
 747 should abort if it finds an unexpected bit set in this range. Bits 0-15
 748 are reserved to signal backwards-compatible format issues; a reader
 749 should simply ignore any unexpected bits set in this range. Currently
 750 only one flag bit is defined, and the rest must be zero:
 751         <variablelist>
 752          <varlistentry>
 753           <term>Bit 16</term>
 754           <listitem>
 755            <para>
 756             if 1, OIDs are included in the data; if 0, not
 757            </para>
 758           </listitem>
 759          </varlistentry>
 760         </variablelist></para>
 761       </listitem>
 762      </varlistentry>
 763
 764      <varlistentry>
 765       <term>Header extension area length</term>
 766       <listitem>
 767        <para>
 768 32-bit integer, length in bytes of remainder of header, not including self.
 769 Currently, this is zero, and the first tuple follows
 770 immediately.  Future changes to the format might allow additional data
 771 to be present in the header.  A reader should silently skip over any header
 772 extension data it does not know what to do with.
 773        </para>
 774       </listitem>
 775      </varlistentry>
 776     </variablelist>
 777     </para>
 778
 779     <para>
 780 The header extension area is envisioned to contain a sequence of
 781 self-identifying chunks.  The flags field is not intended to tell readers
 782 what is in the extension area.  Specific design of header extension contents
 783 is left for a later release.
 784     </para>
 785
 786     <para>
 787      This design allows for both backwards-compatible header additions (add
 788      header extension chunks, or set low-order flag bits) and
 789      non-backwards-compatible changes (set high-order flag bits to signal such
 790      changes, and add supporting data to the extension area if needed).
 791     </para>
 792    </refsect3>
 793
 794    <refsect3>
 795     <title>Tuples</title>
 796     <para>
 797 Each tuple begins with a 16-bit integer count of the number of fields in the
 798 tuple.  (Presently, all tuples in a table will have the same count, but that
 799 might not always be true.)  Then, repeated for each field in the tuple, there
 800 is a 32-bit length word followed by that many bytes of field data.  (The
 801 length word does not include itself, and can be zero.)  As a special case,
 802 -1 indicates a NULL field value.  No value bytes follow in the NULL case.
 803     </para>
 804
 805     <para>
 806 There is no alignment padding or any other extra data between fields.
 807     </para>
 808
 809     <para>
 810 Presently, all data values in a binary-format file are
 811 assumed to be in binary format (format code one).  It is anticipated that a
 812 future extension might add a header field that allows per-column format codes
 813 to be specified.
 814     </para>
 815
 816     <para>
 817 To determine the appropriate binary format for the actual tuple data you
 818 should consult the <productname>PostgreSQL</productname> source, in
 819 particular the <function>*send</> and <function>*recv</> functions for
 820 each column's data type (typically these functions are found in the
 821 <filename>src/backend/utils/adt/</filename> directory of the source
 822 distribution).
 823     </para>
 824
 825     <para>
 826 If OIDs are included in the file, the OID field immediately follows the
 827 field-count word.  It is a normal field except that it's not included
 828 in the field-count.  In particular it has a length word &mdash; this will allow
 829 handling of 4-byte vs. 8-byte OIDs without too much pain, and will allow
 830 OIDs to be shown as null if that ever proves desirable.
 831     </para>
 832    </refsect3>
 833
 834    <refsect3>
 835     <title>File Trailer</title>
 836
 837     <para>
 838      The file trailer consists of a 16-bit integer word containing -1.  This
 839      is easily distinguished from a tuple's field-count word.
 840     </para>
 841
 842     <para>
 843      A reader should report an error if a field-count word is neither -1
 844      nor the expected number of columns.  This provides an extra
 845      check against somehow getting out of sync with the data.
 846     </para>
 847    </refsect3>
 848   </refsect2>
 849  </refsect1>
 850
 851  <refsect1>
 852   <title>Examples</title>
 853
 854   <para>
 855    The following example copies a table to the client
 856    using the vertical bar (<literal>|</literal>) as the field delimiter:
 857 <programlisting>
 858 COPY country TO STDOUT (DELIMITER '|');
 859 </programlisting>
 860   </para>
 861
 862   <para>
 863    To copy data from a file into the <literal>country</> table:
 864 <programlisting>
 865 COPY country FROM '/usr1/proj/bray/sql/country_data';
 866 </programlisting>
 867   </para>
 868
 869   <para>
 870    To copy into a file just the countries whose names start with 'A':
 871 <programlisting>
 872 COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';
 873 </programlisting>
 874   </para>
 875
 876   <para>
 877    To copy into a compressed file, you can pipe the output through an external
 878    compression program:
 879 <programlisting>
 880 COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz';
 881 </programlisting>
 882   </para>
 883
 884   <para>
 885    Here is a sample of data suitable for copying into a table from
 886    <literal>STDIN</literal>:
 887 <programlisting>
 888 AF      AFGHANISTAN
 889 AL      ALBANIA
 890 DZ      ALGERIA
 891 ZM      ZAMBIA
 892 ZW      ZIMBABWE
 893 </programlisting>
 894    Note that the white space on each line is actually a tab character.
 895   </para>
 896
 897   <para>
 898    The following is the same data, output in binary format.
 899    The data is shown after filtering through the
 900    Unix utility <command>od -c</command>. The table has three columns;
 901    the first has type <type>char(2)</type>, the second has type <type>text</type>,
 902    and the third has type <type>integer</type>. All the rows have a null value
 903    in the third column.
 904 <programlisting>
 905 0000000   P   G   C   O   P   Y  \n 377  \r  \n  \0  \0  \0  \0  \0  \0
 906 0000020  \0  \0  \0  \0 003  \0  \0  \0 002   A   F  \0  \0  \0 013   A
 907 0000040   F   G   H   A   N   I   S   T   A   N 377 377 377 377  \0 003
 908 0000060  \0  \0  \0 002   A   L  \0  \0  \0 007   A   L   B   A   N   I
 909 0000100   A 377 377 377 377  \0 003  \0  \0  \0 002   D   Z  \0  \0  \0
 910 0000120 007   A   L   G   E   R   I   A 377 377 377 377  \0 003  \0  \0
 911 0000140  \0 002   Z   M  \0  \0  \0 006   Z   A   M   B   I   A 377 377
 912 0000160 377 377  \0 003  \0  \0  \0 002   Z   W  \0  \0  \0  \b   Z   I
 913 0000200   M   B   A   B   W   E 377 377 377 377 377 377
 914 </programlisting></para>
 915  </refsect1>
 916
 917  <refsect1>
 918   <title>Compatibility</title>
 919
 920   <para>
 921    There is no <command>COPY</command> statement in the SQL standard.
 922   </para>
 923
 924   <para>
 925    The following syntax was used before <productname>PostgreSQL</>
 926    version 9.0 and is still supported:
 927
 928 <synopsis>
 929 COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
 930     FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
 931     [ [ WITH ]
 932           [ BINARY ]
 933           [ OIDS ]
 934           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
 935           [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
 936           [ CSV [ HEADER ]
 937                 [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
 938                 [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
 939                 [ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ]
 940
 941 COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
 942     TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
 943     [ [ WITH ]
 944           [ BINARY ]
 945           [ OIDS ]
 946           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
 947           [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
 948           [ CSV [ HEADER ]
 949                 [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
 950                 [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
 951                 [ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ]
 952 </synopsis>
 953
 954    Note that in this syntax, <literal>BINARY</> and <literal>CSV</> are
 955    treated as independent keywords, not as arguments of a <literal>FORMAT</>
 956    option.
 957   </para>
 958
 959   <para>
 960    The following syntax was used before <productname>PostgreSQL</>
 961    version 7.3 and is still supported:
 962
 963 <synopsis>
 964 COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ]
 965     FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
 966     [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
 967     [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
 968
 969 COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ]
 970     TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
 971     [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
 972     [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
 973 </synopsis></para>
 974  </refsect1>
 975 </refentry>