granicus.if.org Git - postgresql/blob - doc/src/sgml/datatype.sgml

   1 <!-- doc/src/sgml/datatype.sgml -->
   2
   3  <chapter id="datatype">
   4   <title>Data Types</title>
   5
   6   <indexterm zone="datatype">
   7    <primary>data type</primary>
   8   </indexterm>
   9
  10   <indexterm>
  11    <primary>type</primary>
  12    <see>data type</see>
  13   </indexterm>
  14
  15   <para>
  16    <productname>PostgreSQL</productname> has a rich set of native data
  17    types available to users.  Users can add new types to
  18    <productname>PostgreSQL</productname> using the <xref
  19    linkend="sql-createtype"> command.
  20   </para>
  21
  22   <para>
  23    <xref linkend="datatype-table"> shows all the built-in general-purpose data
  24    types. Most of the alternative names listed in the
  25    <quote>Aliases</quote> column are the names used internally by
  26    <productname>PostgreSQL</productname> for historical reasons.  In
  27    addition, some internally used or deprecated types are available,
  28    but are not listed here.
  29   </para>
  30
  31    <table id="datatype-table">
  32     <title>Data Types</title>
  33     <tgroup cols="3">
  34      <thead>
  35       <row>
  36        <entry>Name</entry>
  37        <entry>Aliases</entry>
  38        <entry>Description</entry>
  39       </row>
  40      </thead>
  41
  42      <tbody>
  43       <row>
  44        <entry><type>bigint</type></entry>
  45        <entry><type>int8</type></entry>
  46        <entry>signed eight-byte integer</entry>
  47       </row>
  48
  49       <row>
  50        <entry><type>bigserial</type></entry>
  51        <entry><type>serial8</type></entry>
  52        <entry>autoincrementing eight-byte integer</entry>
  53       </row>
  54
  55       <row>
  56        <entry><type>bit [ (<replaceable>n</replaceable>) ]</type></entry>
  57        <entry></entry>
  58        <entry>fixed-length bit string</entry>
  59       </row>
  60
  61       <row>
  62        <entry><type>bit varying [ (<replaceable>n</replaceable>) ]</type></entry>
  63        <entry><type>varbit</type></entry>
  64        <entry>variable-length bit string</entry>
  65       </row>
  66
  67       <row>
  68        <entry><type>boolean</type></entry>
  69        <entry><type>bool</type></entry>
  70        <entry>logical Boolean (true/false)</entry>
  71       </row>
  72
  73       <row>
  74        <entry><type>box</type></entry>
  75        <entry></entry>
  76        <entry>rectangular box on a plane</entry>
  77       </row>
  78
  79       <row>
  80        <entry><type>bytea</type></entry>
  81        <entry></entry>
  82        <entry>binary data (<quote>byte array</>)</entry>
  83       </row>
  84
  85       <row>
  86        <entry><type>character varying [ (<replaceable>n</replaceable>) ]</type></entry>
  87        <entry><type>varchar [ (<replaceable>n</replaceable>) ]</type></entry>
  88        <entry>variable-length character string</entry>
  89       </row>
  90
  91       <row>
  92        <entry><type>character [ (<replaceable>n</replaceable>) ]</type></entry>
  93        <entry><type>char [ (<replaceable>n</replaceable>) ]</type></entry>
  94        <entry>fixed-length character string</entry>
  95       </row>
  96
  97       <row>
  98        <entry><type>cidr</type></entry>
  99        <entry></entry>
 100        <entry>IPv4 or IPv6 network address</entry>
 101       </row>
 102
 103       <row>
 104        <entry><type>circle</type></entry>
 105        <entry></entry>
 106        <entry>circle on a plane</entry>
 107       </row>
 108
 109       <row>
 110        <entry><type>date</type></entry>
 111        <entry></entry>
 112        <entry>calendar date (year, month, day)</entry>
 113       </row>
 114
 115       <row>
 116        <entry><type>double precision</type></entry>
 117        <entry><type>float8</type></entry>
 118        <entry>double precision floating-point number (8 bytes)</entry>
 119       </row>
 120
 121       <row>
 122        <entry><type>inet</type></entry>
 123        <entry></entry>
 124        <entry>IPv4 or IPv6 host address</entry>
 125       </row>
 126
 127       <row>
 128        <entry><type>integer</type></entry>
 129        <entry><type>int</type>, <type>int4</type></entry>
 130        <entry>signed four-byte integer</entry>
 131       </row>
 132
 133       <row>
 134        <entry><type>interval [ <replaceable>fields</replaceable> ] [ (<replaceable>p</replaceable>) ]</type></entry>
 135        <entry></entry>
 136        <entry>time span</entry>
 137       </row>
 138
 139       <row>
 140        <entry><type>line</type></entry>
 141        <entry></entry>
 142        <entry>infinite line on a plane</entry>
 143       </row>
 144
 145       <row>
 146        <entry><type>lseg</type></entry>
 147        <entry></entry>
 148        <entry>line segment on a plane</entry>
 149       </row>
 150
 151       <row>
 152        <entry><type>macaddr</type></entry>
 153        <entry></entry>
 154        <entry>MAC (Media Access Control) address</entry>
 155       </row>
 156
 157       <row>
 158        <entry><type>money</type></entry>
 159        <entry></entry>
 160        <entry>currency amount</entry>
 161       </row>
 162
 163       <row>
 164        <entry><type>numeric [ (<replaceable>p</replaceable>,
 165          <replaceable>s</replaceable>) ]</type></entry>
 166        <entry><type>decimal [ (<replaceable>p</replaceable>,
 167          <replaceable>s</replaceable>) ]</type></entry>
 168        <entry>exact numeric of selectable precision</entry>
 169       </row>
 170
 171       <row>
 172        <entry><type>path</type></entry>
 173        <entry></entry>
 174        <entry>geometric path on a plane</entry>
 175       </row>
 176
 177       <row>
 178        <entry><type>point</type></entry>
 179        <entry></entry>
 180        <entry>geometric point on a plane</entry>
 181       </row>
 182
 183       <row>
 184        <entry><type>polygon</type></entry>
 185        <entry></entry>
 186        <entry>closed geometric path on a plane</entry>
 187       </row>
 188
 189       <row>
 190        <entry><type>real</type></entry>
 191        <entry><type>float4</type></entry>
 192        <entry>single precision floating-point number (4 bytes)</entry>
 193       </row>
 194
 195       <row>
 196        <entry><type>smallint</type></entry>
 197        <entry><type>int2</type></entry>
 198        <entry>signed two-byte integer</entry>
 199       </row>
 200
 201       <row>
 202        <entry><type>smallserial</type></entry>
 203        <entry><type>serial2</type></entry>
 204        <entry>autoincrementing two-byte integer</entry>
 205       </row>
 206
 207       <row>
 208        <entry><type>serial</type></entry>
 209        <entry><type>serial4</type></entry>
 210        <entry>autoincrementing four-byte integer</entry>
 211       </row>
 212
 213       <row>
 214        <entry><type>text</type></entry>
 215        <entry></entry>
 216        <entry>variable-length character string</entry>
 217       </row>
 218
 219       <row>
 220        <entry><type>time [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
 221        <entry></entry>
 222        <entry>time of day (no time zone)</entry>
 223       </row>
 224
 225       <row>
 226        <entry><type>time [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
 227        <entry><type>timetz</type></entry>
 228        <entry>time of day, including time zone</entry>
 229       </row>
 230
 231       <row>
 232        <entry><type>timestamp [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
 233        <entry></entry>
 234        <entry>date and time (no time zone)</entry>
 235       </row>
 236
 237       <row>
 238        <entry><type>timestamp [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
 239        <entry><type>timestamptz</type></entry>
 240        <entry>date and time, including time zone</entry>
 241       </row>
 242
 243       <row>
 244        <entry><type>tsquery</type></entry>
 245        <entry></entry>
 246        <entry>text search query</entry>
 247       </row>
 248
 249       <row>
 250        <entry><type>tsvector</type></entry>
 251        <entry></entry>
 252        <entry>text search document</entry>
 253       </row>
 254
 255       <row>
 256        <entry><type>txid_snapshot</type></entry>
 257        <entry></entry>
 258        <entry>user-level transaction ID snapshot</entry>
 259       </row>
 260
 261       <row>
 262        <entry><type>uuid</type></entry>
 263        <entry></entry>
 264        <entry>universally unique identifier</entry>
 265       </row>
 266
 267       <row>
 268        <entry><type>xml</type></entry>
 269        <entry></entry>
 270        <entry>XML data</entry>
 271       </row>
 272
 273       <row>
 274        <entry><type>json</type></entry>
 275        <entry></entry>
 276        <entry>JSON data</entry>
 277       </row>
 278      </tbody>
 279     </tgroup>
 280    </table>
 281
 282   <note>
 283    <title>Compatibility</title>
 284    <para>
 285     The following types (or spellings thereof) are specified by
 286     <acronym>SQL</acronym>: <type>bigint</type>, <type>bit</type>, <type>bit
 287     varying</type>, <type>boolean</type>, <type>char</type>,
 288     <type>character varying</type>, <type>character</type>,
 289     <type>varchar</type>, <type>date</type>, <type>double
 290     precision</type>, <type>integer</type>, <type>interval</type>,
 291     <type>numeric</type>, <type>decimal</type>, <type>real</type>,
 292     <type>smallint</type>, <type>time</type> (with or without time zone),
 293     <type>timestamp</type> (with or without time zone),
 294     <type>xml</type>.
 295    </para>
 296   </note>
 297
 298   <para>
 299    Each data type has an external representation determined by its input
 300    and output functions.  Many of the built-in types have
 301    obvious external formats.  However, several types are either unique
 302    to <productname>PostgreSQL</productname>, such as geometric
 303    paths, or have several possible formats, such as the date
 304    and time types.
 305    Some of the input and output functions are not invertible, i.e.,
 306    the result of an output function might lose accuracy when compared to
 307    the original input.
 308   </para>
 309
 310   <sect1 id="datatype-numeric">
 311    <title>Numeric Types</title>
 312
 313    <indexterm zone="datatype-numeric">
 314     <primary>data type</primary>
 315     <secondary>numeric</secondary>
 316    </indexterm>
 317
 318    <para>
 319     Numeric types consist of two-, four-, and eight-byte integers,
 320     four- and eight-byte floating-point numbers, and selectable-precision
 321     decimals.  <xref linkend="datatype-numeric-table"> lists the
 322     available types.
 323    </para>
 324
 325     <table id="datatype-numeric-table">
 326      <title>Numeric Types</title>
 327      <tgroup cols="4">
 328       <thead>
 329        <row>
 330         <entry>Name</entry>
 331         <entry>Storage Size</entry>
 332         <entry>Description</entry>
 333         <entry>Range</entry>
 334        </row>
 335       </thead>
 336
 337       <tbody>
 338        <row>
 339         <entry><type>smallint</></entry>
 340         <entry>2 bytes</entry>
 341         <entry>small-range integer</entry>
 342         <entry>-32768 to +32767</entry>
 343        </row>
 344        <row>
 345         <entry><type>integer</></entry>
 346         <entry>4 bytes</entry>
 347         <entry>typical choice for integer</entry>
 348         <entry>-2147483648 to +2147483647</entry>
 349        </row>
 350        <row>
 351         <entry><type>bigint</></entry>
 352         <entry>8 bytes</entry>
 353         <entry>large-range integer</entry>
 354         <entry>-9223372036854775808 to +9223372036854775807</entry>
 355        </row>
 356
 357        <row>
 358         <entry><type>decimal</></entry>
 359         <entry>variable</entry>
 360         <entry>user-specified precision, exact</entry>
 361         <entry>up to 131072 digits before the decimal point; up to 16383 digits after the decimal point</entry>
 362        </row>
 363        <row>
 364         <entry><type>numeric</></entry>
 365         <entry>variable</entry>
 366         <entry>user-specified precision, exact</entry>
 367         <entry>up to 131072 digits before the decimal point; up to 16383 digits after the decimal point</entry>
 368        </row>
 369
 370        <row>
 371         <entry><type>real</></entry>
 372         <entry>4 bytes</entry>
 373         <entry>variable-precision, inexact</entry>
 374         <entry>6 decimal digits precision</entry>
 375        </row>
 376        <row>
 377         <entry><type>double precision</></entry>
 378         <entry>8 bytes</entry>
 379         <entry>variable-precision, inexact</entry>
 380         <entry>15 decimal digits precision</entry>
 381        </row>
 382
 383        <row>
 384         <entry><type>smallserial</type></entry>
 385         <entry>2 bytes</entry>
 386         <entry>small autoincrementing integer</entry>
 387         <entry>1 to 32767</entry>
 388        </row>
 389
 390        <row>
 391         <entry><type>serial</></entry>
 392         <entry>4 bytes</entry>
 393         <entry>autoincrementing integer</entry>
 394         <entry>1 to 2147483647</entry>
 395        </row>
 396
 397        <row>
 398         <entry><type>bigserial</type></entry>
 399         <entry>8 bytes</entry>
 400         <entry>large autoincrementing integer</entry>
 401         <entry>1 to 9223372036854775807</entry>
 402        </row>
 403       </tbody>
 404      </tgroup>
 405     </table>
 406
 407    <para>
 408     The syntax of constants for the numeric types is described in
 409     <xref linkend="sql-syntax-constants">.  The numeric types have a
 410     full set of corresponding arithmetic operators and
 411     functions. Refer to <xref linkend="functions"> for more
 412     information.  The following sections describe the types in detail.
 413    </para>
 414
 415    <sect2 id="datatype-int">
 416     <title>Integer Types</title>
 417
 418     <indexterm zone="datatype-int">
 419      <primary>integer</primary>
 420     </indexterm>
 421
 422     <indexterm zone="datatype-int">
 423      <primary>smallint</primary>
 424     </indexterm>
 425
 426     <indexterm zone="datatype-int">
 427      <primary>bigint</primary>
 428     </indexterm>
 429
 430     <indexterm>
 431      <primary>int4</primary>
 432      <see>integer</see>
 433     </indexterm>
 434
 435     <indexterm>
 436      <primary>int2</primary>
 437      <see>smallint</see>
 438     </indexterm>
 439
 440     <indexterm>
 441      <primary>int8</primary>
 442      <see>bigint</see>
 443     </indexterm>
 444
 445     <para>
 446      The types <type>smallint</type>, <type>integer</type>, and
 447      <type>bigint</type> store whole numbers, that is, numbers without
 448      fractional components, of various ranges.  Attempts to store
 449      values outside of the allowed range will result in an error.
 450     </para>
 451
 452     <para>
 453      The type <type>integer</type> is the common choice, as it offers
 454      the best balance between range, storage size, and performance.
 455      The <type>smallint</type> type is generally only used if disk
 456      space is at a premium.  The <type>bigint</type> type is designed to be
 457      used when the range of the <type>integer</type> type is insufficient.
 458     </para>
 459
 460     <para>
 461      <acronym>SQL</acronym> only specifies the integer types
 462      <type>integer</type> (or <type>int</type>),
 463      <type>smallint</type>, and <type>bigint</type>.  The
 464      type names <type>int2</type>, <type>int4</type>, and
 465      <type>int8</type> are extensions, which are also used by some
 466      other <acronym>SQL</acronym> database systems.
 467     </para>
 468
 469    </sect2>
 470
 471    <sect2 id="datatype-numeric-decimal">
 472     <title>Arbitrary Precision Numbers</title>
 473
 474     <indexterm>
 475      <primary>numeric (data type)</primary>
 476     </indexterm>
 477
 478    <indexterm>
 479     <primary>arbitrary precision numbers</primary>
 480    </indexterm>
 481
 482     <indexterm>
 483      <primary>decimal</primary>
 484      <see>numeric</see>
 485     </indexterm>
 486
 487     <para>
 488      The type <type>numeric</type> can store numbers with a
 489      very large number of digits and perform calculations exactly. It is
 490      especially recommended for storing monetary amounts and other
 491      quantities where exactness is required. However, arithmetic on
 492      <type>numeric</type> values is very slow compared to the integer
 493      types, or to the floating-point types described in the next section.
 494     </para>
 495
 496     <para>
 497      We use the following terms below:  The
 498      <firstterm>scale</firstterm> of a <type>numeric</type> is the
 499      count of decimal digits in the fractional part, to the right of
 500      the decimal point.  The <firstterm>precision</firstterm> of a
 501      <type>numeric</type> is the total count of significant digits in
 502      the whole number, that is, the number of digits to both sides of
 503      the decimal point.  So the number 23.5141 has a precision of 6
 504      and a scale of 4.  Integers can be considered to have a scale of
 505      zero.
 506     </para>
 507
 508     <para>
 509      Both the maximum precision and the maximum scale of a
 510      <type>numeric</type> column can be
 511      configured.  To declare a column of type <type>numeric</type> use
 512      the syntax:
 513 <programlisting>
 514 NUMERIC(<replaceable>precision</replaceable>, <replaceable>scale</replaceable>)
 515 </programlisting>
 516      The precision must be positive, the scale zero or positive.
 517      Alternatively:
 518 <programlisting>
 519 NUMERIC(<replaceable>precision</replaceable>)
 520 </programlisting>
 521      selects a scale of 0.  Specifying:
 522 <programlisting>
 523 NUMERIC
 524 </programlisting>
 525      without any precision or scale creates a column in which numeric
 526      values of any precision and scale can be stored, up to the
 527      implementation limit on precision.  A column of this kind will
 528      not coerce input values to any particular scale, whereas
 529      <type>numeric</type> columns with a declared scale will coerce
 530      input values to that scale.  (The <acronym>SQL</acronym> standard
 531      requires a default scale of 0, i.e., coercion to integer
 532      precision.  We find this a bit useless.  If you're concerned
 533      about portability, always specify the precision and scale
 534      explicitly.)
 535     </para>
 536
 537     <note>
 538      <para>
 539       The maximum allowed precision when explicitly specified in the
 540       type declaration is 1000; <type>NUMERIC</type> without a specified
 541       precision is subject to the limits described in <xref
 542       linkend="datatype-numeric-table">.
 543      </para>
 544     </note>
 545
 546     <para>
 547      If the scale of a value to be stored is greater than the declared
 548      scale of the column, the system will round the value to the specified
 549      number of fractional digits.  Then, if the number of digits to the
 550      left of the decimal point exceeds the declared precision minus the
 551      declared scale, an error is raised.
 552     </para>
 553
 554     <para>
 555      Numeric values are physically stored without any extra leading or
 556      trailing zeroes.  Thus, the declared precision and scale of a column
 557      are maximums, not fixed allocations.  (In this sense the <type>numeric</>
 558      type is more akin to <type>varchar(<replaceable>n</>)</type>
 559      than to <type>char(<replaceable>n</>)</type>.)  The actual storage
 560      requirement is two bytes for each group of four decimal digits,
 561      plus three to eight bytes overhead.
 562     </para>
 563
 564     <indexterm>
 565      <primary>NaN</primary>
 566      <see>not a number</see>
 567    </indexterm>
 568
 569     <indexterm>
 570      <primary>not a number</primary>
 571      <secondary>numeric (data type)</secondary>
 572     </indexterm>
 573
 574     <para>
 575      In addition to ordinary numeric values, the <type>numeric</type>
 576      type allows the special value <literal>NaN</>, meaning
 577      <quote>not-a-number</quote>.  Any operation on <literal>NaN</>
 578      yields another <literal>NaN</>.  When writing this value
 579      as a constant in an SQL command, you must put quotes around it,
 580      for example <literal>UPDATE table SET x = 'NaN'</>.  On input,
 581      the string <literal>NaN</> is recognized in a case-insensitive manner.
 582     </para>
 583
 584     <note>
 585      <para>
 586       In most implementations of the <quote>not-a-number</> concept,
 587       <literal>NaN</> is not considered equal to any other numeric
 588       value (including <literal>NaN</>).  In order to allow
 589       <type>numeric</> values to be sorted and used in tree-based
 590       indexes, <productname>PostgreSQL</> treats <literal>NaN</>
 591       values as equal, and greater than all non-<literal>NaN</>
 592       values.
 593      </para>
 594     </note>
 595
 596     <para>
 597      The types <type>decimal</type> and <type>numeric</type> are
 598      equivalent.  Both types are part of the <acronym>SQL</acronym>
 599      standard.
 600     </para>
 601    </sect2>
 602
 603
 604    <sect2 id="datatype-float">
 605     <title>Floating-Point Types</title>
 606
 607     <indexterm zone="datatype-float">
 608      <primary>real</primary>
 609     </indexterm>
 610
 611     <indexterm zone="datatype-float">
 612      <primary>double precision</primary>
 613     </indexterm>
 614
 615     <indexterm>
 616      <primary>float4</primary>
 617      <see>real</see>
 618     </indexterm>
 619
 620     <indexterm>
 621      <primary>float8</primary>
 622      <see>double precision</see>
 623     </indexterm>
 624
 625     <indexterm zone="datatype-float">
 626      <primary>floating point</primary>
 627     </indexterm>
 628
 629     <para>
 630      The data types <type>real</type> and <type>double
 631      precision</type> are inexact, variable-precision numeric types.
 632      In practice, these types are usually implementations of
 633      <acronym>IEEE</acronym> Standard 754 for Binary Floating-Point
 634      Arithmetic (single and double precision, respectively), to the
 635      extent that the underlying processor, operating system, and
 636      compiler support it.
 637     </para>
 638
 639     <para>
 640      Inexact means that some values cannot be converted exactly to the
 641      internal format and are stored as approximations, so that storing
 642      and retrieving a value might show slight discrepancies.
 643      Managing these errors and how they propagate through calculations
 644      is the subject of an entire branch of mathematics and computer
 645      science and will not be discussed here, except for the
 646      following points:
 647      <itemizedlist>
 648       <listitem>
 649        <para>
 650         If you require exact storage and calculations (such as for
 651         monetary amounts), use the <type>numeric</type> type instead.
 652        </para>
 653       </listitem>
 654
 655       <listitem>
 656        <para>
 657         If you want to do complicated calculations with these types
 658         for anything important, especially if you rely on certain
 659         behavior in boundary cases (infinity, underflow), you should
 660         evaluate the implementation carefully.
 661        </para>
 662       </listitem>
 663
 664       <listitem>
 665        <para>
 666         Comparing two floating-point values for equality might not
 667         always work as expected.
 668        </para>
 669       </listitem>
 670      </itemizedlist>
 671     </para>
 672
 673     <para>
 674      On most platforms, the <type>real</type> type has a range of at least
 675      1E-37 to 1E+37 with a precision of at least 6 decimal digits.  The
 676      <type>double precision</type> type typically has a range of around
 677      1E-307 to 1E+308 with a precision of at least 15 digits.  Values that
 678      are too large or too small will cause an error.  Rounding might
 679      take place if the precision of an input number is too high.
 680      Numbers too close to zero that are not representable as distinct
 681      from zero will cause an underflow error.
 682     </para>
 683
 684     <indexterm>
 685      <primary>not a number</primary>
 686      <secondary>double precision</secondary>
 687     </indexterm>
 688
 689     <para>
 690      In addition to ordinary numeric values, the floating-point types
 691      have several special values:
 692 <literallayout>
 693 <literal>Infinity</literal>
 694 <literal>-Infinity</literal>
 695 <literal>NaN</literal>
 696 </literallayout>
 697      These represent the IEEE 754 special values
 698      <quote>infinity</quote>, <quote>negative infinity</quote>, and
 699      <quote>not-a-number</quote>, respectively.  (On a machine whose
 700      floating-point arithmetic does not follow IEEE 754, these values
 701      will probably not work as expected.)  When writing these values
 702      as constants in an SQL command, you must put quotes around them,
 703      for example <literal>UPDATE table SET x = 'Infinity'</>.  On input,
 704      these strings are recognized in a case-insensitive manner.
 705     </para>
 706
 707     <note>
 708      <para>
 709       IEEE754 specifies that <literal>NaN</> should not compare equal
 710       to any other floating-point value (including <literal>NaN</>).
 711       In order to allow floating-point values to be sorted and used
 712       in tree-based indexes, <productname>PostgreSQL</> treats
 713       <literal>NaN</> values as equal, and greater than all
 714       non-<literal>NaN</> values.
 715      </para>
 716     </note>
 717
 718     <para>
 719      <productname>PostgreSQL</productname> also supports the SQL-standard
 720      notations <type>float</type> and
 721      <type>float(<replaceable>p</replaceable>)</type> for specifying
 722      inexact numeric types.  Here, <replaceable>p</replaceable> specifies
 723      the minimum acceptable precision in <emphasis>binary</> digits.
 724      <productname>PostgreSQL</productname> accepts
 725      <type>float(1)</type> to <type>float(24)</type> as selecting the
 726      <type>real</type> type, while
 727      <type>float(25)</type> to <type>float(53)</type> select
 728      <type>double precision</type>.  Values of <replaceable>p</replaceable>
 729      outside the allowed range draw an error.
 730      <type>float</type> with no precision specified is taken to mean
 731      <type>double precision</type>.
 732     </para>
 733
 734     <note>
 735      <para>
 736       Prior to <productname>PostgreSQL</productname> 7.4, the precision in
 737       <type>float(<replaceable>p</replaceable>)</type> was taken to mean
 738       so many <emphasis>decimal</> digits.  This has been corrected to match the SQL
 739       standard, which specifies that the precision is measured in binary
 740       digits.  The assumption that <type>real</type> and
 741       <type>double precision</type> have exactly 24 and 53 bits in the
 742       mantissa respectively is correct for IEEE-standard floating point
 743       implementations.  On non-IEEE platforms it might be off a little, but
 744       for simplicity the same ranges of <replaceable>p</replaceable> are used
 745       on all platforms.
 746      </para>
 747     </note>
 748
 749    </sect2>
 750
 751    <sect2 id="datatype-serial">
 752     <title>Serial Types</title>
 753
 754     <indexterm zone="datatype-serial">
 755      <primary>smallserial</primary>
 756     </indexterm>
 757
 758     <indexterm zone="datatype-serial">
 759      <primary>serial</primary>
 760     </indexterm>
 761
 762     <indexterm zone="datatype-serial">
 763      <primary>bigserial</primary>
 764     </indexterm>
 765
 766     <indexterm zone="datatype-serial">
 767      <primary>serial2</primary>
 768     </indexterm>
 769
 770     <indexterm zone="datatype-serial">
 771      <primary>serial4</primary>
 772     </indexterm>
 773
 774     <indexterm zone="datatype-serial">
 775      <primary>serial8</primary>
 776     </indexterm>
 777
 778     <indexterm>
 779      <primary>auto-increment</primary>
 780      <see>serial</see>
 781     </indexterm>
 782
 783     <indexterm>
 784      <primary>sequence</primary>
 785      <secondary>and serial type</secondary>
 786     </indexterm>
 787
 788     <para>
 789      The data types <type>smallserial</type>, <type>serial</type> and
 790      <type>bigserial</type> are not true types, but merely
 791      a notational convenience for creating unique identifier columns
 792      (similar to the <literal>AUTO_INCREMENT</literal> property
 793      supported by some other databases). In the current
 794      implementation, specifying:
 795
 796 <programlisting>
 797 CREATE TABLE <replaceable class="parameter">tablename</replaceable> (
 798     <replaceable class="parameter">colname</replaceable> SERIAL
 799 );
 800 </programlisting>
 801
 802      is equivalent to specifying:
 803
 804 <programlisting>
 805 CREATE SEQUENCE <replaceable class="parameter">tablename</replaceable>_<replaceable class="parameter">colname</replaceable>_seq;
 806 CREATE TABLE <replaceable class="parameter">tablename</replaceable> (
 807     <replaceable class="parameter">colname</replaceable> integer NOT NULL DEFAULT nextval('<replaceable class="parameter">tablename</replaceable>_<replaceable class="parameter">colname</replaceable>_seq')
 808 );
 809 ALTER SEQUENCE <replaceable class="parameter">tablename</replaceable>_<replaceable class="parameter">colname</replaceable>_seq OWNED BY <replaceable class="parameter">tablename</replaceable>.<replaceable class="parameter">colname</replaceable>;
 810 </programlisting>
 811
 812      Thus, we have created an integer column and arranged for its default
 813      values to be assigned from a sequence generator.  A <literal>NOT NULL</>
 814      constraint is applied to ensure that a null value cannot be
 815      inserted.  (In most cases you would also want to attach a
 816      <literal>UNIQUE</> or <literal>PRIMARY KEY</> constraint to prevent
 817      duplicate values from being inserted by accident, but this is
 818      not automatic.)  Lastly, the sequence is marked as <quote>owned by</>
 819      the column, so that it will be dropped if the column or table is dropped.
 820     </para>
 821
 822     <note>
 823       <para>
 824         Because <type>smallserial</type>, <type>serial</type> and
 825         <type>bigserial</type> are implemented using sequences, there may
 826         be "holes" or gaps in the sequence of values which appears in the
 827         column, even if no rows are ever deleted.  A value allocated
 828         from the sequence is still "used up" even if a row containing that
 829         value is never successfully inserted into the table column.  This
 830         may happen, for example, if the inserting transaction rolls back.
 831         See <literal>nextval()</literal> in <xref linkend="functions-sequence">
 832         for details.
 833       </para>
 834     </note>
 835
 836     <note>
 837      <para>
 838       Prior to <productname>PostgreSQL</productname> 7.3, <type>serial</type>
 839       implied <literal>UNIQUE</literal>.  This is no longer automatic.  If
 840       you wish a serial column to have a unique constraint or be a
 841       primary key, it must now be specified, just like
 842       any other data type.
 843      </para>
 844     </note>
 845
 846     <para>
 847      To insert the next value of the sequence into the <type>serial</type>
 848      column, specify that the <type>serial</type>
 849      column should be assigned its default value. This can be done
 850      either by excluding the column from the list of columns in
 851      the <command>INSERT</command> statement, or through the use of
 852      the <literal>DEFAULT</literal> key word.
 853     </para>
 854
 855     <para>
 856      The type names <type>serial</type> and <type>serial4</type> are
 857      equivalent: both create <type>integer</type> columns.  The type
 858      names <type>bigserial</type> and <type>serial8</type> work
 859      the same way, except that they create a <type>bigint</type>
 860      column.  <type>bigserial</type> should be used if you anticipate
 861      the use of more than 2<superscript>31</> identifiers over the
 862      lifetime of the table. The type names <type>smallserial</type> and
 863      <type>serial2</type> also work the same way, except that they
 864      create a <type>smallint</type> column.
 865     </para>
 866
 867     <para>
 868      The sequence created for a <type>serial</type> column is
 869      automatically dropped when the owning column is dropped.
 870      You can drop the sequence without dropping the column, but this
 871      will force removal of the column default expression.
 872     </para>
 873    </sect2>
 874   </sect1>
 875
 876   <sect1 id="datatype-money">
 877    <title>Monetary Types</title>
 878
 879    <para>
 880     The <type>money</type> type stores a currency amount with a fixed
 881     fractional precision; see <xref
 882     linkend="datatype-money-table">.  The fractional precision is
 883     determined by the database's <xref linkend="guc-lc-monetary"> setting.
 884     The range shown in the table assumes there are two fractional digits.
 885     Input is accepted in a variety of formats, including integer and
 886     floating-point literals, as well as typical
 887     currency formatting, such as <literal>'$1,000.00'</literal>.
 888     Output is generally in the latter form but depends on the locale.
 889    </para>
 890
 891     <table id="datatype-money-table">
 892      <title>Monetary Types</title>
 893      <tgroup cols="4">
 894       <thead>
 895        <row>
 896         <entry>Name</entry>
 897         <entry>Storage Size</entry>
 898         <entry>Description</entry>
 899         <entry>Range</entry>
 900        </row>
 901       </thead>
 902       <tbody>
 903        <row>
 904         <entry>money</entry>
 905         <entry>8 bytes</entry>
 906         <entry>currency amount</entry>
 907         <entry>-92233720368547758.08 to +92233720368547758.07</entry>
 908        </row>
 909       </tbody>
 910      </tgroup>
 911     </table>
 912
 913    <para>
 914     Since the output of this data type is locale-sensitive, it might not
 915     work to load <type>money</> data into a database that has a different
 916     setting of <varname>lc_monetary</>.  To avoid problems, before
 917     restoring a dump into a new database make sure <varname>lc_monetary</> has
 918     the same or equivalent value as in the database that was dumped.
 919    </para>
 920
 921    <para>
 922     Values of the <type>numeric</type>, <type>int</type>, and
 923     <type>bigint</type> data types can be cast to <type>money</type>.
 924     Conversion from the <type>real</type> and <type>double precision</type>
 925     data types can be done by casting to <type>numeric</type> first, for
 926     example:
 927 <programlisting>
 928 SELECT '12.34'::float8::numeric::money;
 929 </programlisting>
 930     However, this is not recommended.  Floating point numbers should not be
 931     used to handle money due to the potential for rounding errors.
 932    </para>
 933
 934    <para>
 935     A <type>money</type> value can be cast to <type>numeric</type> without
 936     loss of precision. Conversion to other types could potentially lose
 937     precision, and must also be done in two stages:
 938 <programlisting>
 939 SELECT '52093.89'::money::numeric::float8;
 940 </programlisting>
 941    </para>
 942
 943    <para>
 944     When a <type>money</type> value is divided by another <type>money</type>
 945     value, the result is <type>double precision</type> (i.e., a pure number,
 946     not money); the currency units cancel each other out in the division.
 947    </para>
 948   </sect1>
 949
 950
 951   <sect1 id="datatype-character">
 952    <title>Character Types</title>
 953
 954    <indexterm zone="datatype-character">
 955     <primary>character string</primary>
 956     <secondary>data types</secondary>
 957    </indexterm>
 958
 959    <indexterm>
 960     <primary>string</primary>
 961     <see>character string</see>
 962    </indexterm>
 963
 964    <indexterm zone="datatype-character">
 965     <primary>character</primary>
 966    </indexterm>
 967
 968    <indexterm zone="datatype-character">
 969     <primary>character varying</primary>
 970    </indexterm>
 971
 972    <indexterm zone="datatype-character">
 973     <primary>text</primary>
 974    </indexterm>
 975
 976    <indexterm zone="datatype-character">
 977     <primary>char</primary>
 978    </indexterm>
 979
 980    <indexterm zone="datatype-character">
 981     <primary>varchar</primary>
 982    </indexterm>
 983
 984     <table id="datatype-character-table">
 985      <title>Character Types</title>
 986      <tgroup cols="2">
 987       <thead>
 988        <row>
 989         <entry>Name</entry>
 990         <entry>Description</entry>
 991        </row>
 992       </thead>
 993       <tbody>
 994        <row>
 995         <entry><type>character varying(<replaceable>n</>)</type>, <type>varchar(<replaceable>n</>)</type></entry>
 996         <entry>variable-length with limit</entry>
 997        </row>
 998        <row>
 999         <entry><type>character(<replaceable>n</>)</type>, <type>char(<replaceable>n</>)</type></entry>
1000         <entry>fixed-length, blank padded</entry>
1001        </row>
1002        <row>
1003         <entry><type>text</type></entry>
1004         <entry>variable unlimited length</entry>
1005        </row>
1006      </tbody>
1007      </tgroup>
1008     </table>
1009
1010    <para>
1011     <xref linkend="datatype-character-table"> shows the
1012     general-purpose character types available in
1013     <productname>PostgreSQL</productname>.
1014    </para>
1015
1016    <para>
1017     <acronym>SQL</acronym> defines two primary character types:
1018     <type>character varying(<replaceable>n</>)</type> and
1019     <type>character(<replaceable>n</>)</type>, where <replaceable>n</>
1020     is a positive integer.  Both of these types can store strings up to
1021     <replaceable>n</> characters (not bytes) in length.  An attempt to store a
1022     longer string into a column of these types will result in an
1023     error, unless the excess characters are all spaces, in which case
1024     the string will be truncated to the maximum length. (This somewhat
1025     bizarre exception is required by the <acronym>SQL</acronym>
1026     standard.) If the string to be stored is shorter than the declared
1027     length, values of type <type>character</type> will be space-padded;
1028     values of type <type>character varying</type> will simply store the
1029     shorter
1030     string.
1031    </para>
1032
1033    <para>
1034     If one explicitly casts a value to <type>character
1035     varying(<replaceable>n</>)</type> or
1036     <type>character(<replaceable>n</>)</type>, then an over-length
1037     value will be truncated to <replaceable>n</> characters without
1038     raising an error. (This too is required by the
1039     <acronym>SQL</acronym> standard.)
1040    </para>
1041
1042    <para>
1043     The notations <type>varchar(<replaceable>n</>)</type> and
1044     <type>char(<replaceable>n</>)</type> are aliases for <type>character
1045     varying(<replaceable>n</>)</type> and
1046     <type>character(<replaceable>n</>)</type>, respectively.
1047     <type>character</type> without length specifier is equivalent to
1048     <type>character(1)</type>. If <type>character varying</type> is used
1049     without length specifier, the type accepts strings of any size. The
1050     latter is a <productname>PostgreSQL</> extension.
1051    </para>
1052
1053    <para>
1054     In addition, <productname>PostgreSQL</productname> provides the
1055     <type>text</type> type, which stores strings of any length.
1056     Although the type <type>text</type> is not in the
1057     <acronym>SQL</acronym> standard, several other SQL database
1058     management systems have it as well.
1059    </para>
1060
1061    <para>
1062     Values of type <type>character</type> are physically padded
1063     with spaces to the specified width <replaceable>n</>, and are
1064     stored and displayed that way.  However, the padding spaces are
1065     treated as semantically insignificant.  Trailing spaces are
1066     disregarded when comparing two values of type <type>character</type>,
1067     and they will be removed when converting a <type>character</type> value
1068     to one of the other string types.  Note that trailing spaces
1069     <emphasis>are</> semantically significant in
1070     <type>character varying</type> and <type>text</type> values, and
1071     when using pattern matching, e.g. <literal>LIKE</>,
1072     regular expressions.
1073    </para>
1074
1075    <para>
1076     The storage requirement for a short string (up to 126 bytes) is 1 byte
1077     plus the actual string, which includes the space padding in the case of
1078     <type>character</type>.  Longer strings have 4 bytes of overhead instead
1079     of 1.  Long strings are compressed by the system automatically, so
1080     the physical requirement on disk might be less. Very long values are also
1081     stored in background tables so that they do not interfere with rapid
1082     access to shorter column values. In any case, the longest
1083     possible character string that can be stored is about 1 GB. (The
1084     maximum value that will be allowed for <replaceable>n</> in the data
1085     type declaration is less than that. It wouldn't be useful to
1086     change this because with multibyte character encodings the number of
1087     characters and bytes can be quite different. If you desire to
1088     store long strings with no specific upper limit, use
1089     <type>text</type> or <type>character varying</type> without a length
1090     specifier, rather than making up an arbitrary length limit.)
1091    </para>
1092
1093    <tip>
1094     <para>
1095      There is no performance difference among these three types,
1096      apart from increased storage space when using the blank-padded
1097      type, and a few extra CPU cycles to check the length when storing into
1098      a length-constrained column.  While
1099      <type>character(<replaceable>n</>)</type> has performance
1100      advantages in some other database systems, there is no such advantage in
1101      <productname>PostgreSQL</productname>; in fact
1102      <type>character(<replaceable>n</>)</type> is usually the slowest of
1103      the three because of its additional storage costs.  In most situations
1104      <type>text</type> or <type>character varying</type> should be used
1105      instead.
1106     </para>
1107    </tip>
1108
1109    <para>
1110     Refer to <xref linkend="sql-syntax-strings"> for information about
1111     the syntax of string literals, and to <xref linkend="functions">
1112     for information about available operators and functions. The
1113     database character set determines the character set used to store
1114     textual values; for more information on character set support,
1115     refer to <xref linkend="multibyte">.
1116    </para>
1117
1118    <example>
1119     <title>Using the Character Types</title>
1120
1121 <programlisting>
1122 CREATE TABLE test1 (a character(4));
1123 INSERT INTO test1 VALUES ('ok');
1124 SELECT a, char_length(a) FROM test1; -- <co id="co.datatype-char">
1125 <computeroutput>
1126   a   | char_length
1127 ------+-------------
1128  ok   |           2
1129 </computeroutput>
1130
1131 CREATE TABLE test2 (b varchar(5));
1132 INSERT INTO test2 VALUES ('ok');
1133 INSERT INTO test2 VALUES ('good      ');
1134 INSERT INTO test2 VALUES ('too long');
1135 <computeroutput>ERROR:  value too long for type character varying(5)</computeroutput>
1136 INSERT INTO test2 VALUES ('too long'::varchar(5)); -- explicit truncation
1137 SELECT b, char_length(b) FROM test2;
1138 <computeroutput>
1139    b   | char_length
1140 -------+-------------
1141  ok    |           2
1142  good  |           5
1143  too l |           5
1144 </computeroutput>
1145 </programlisting>
1146     <calloutlist>
1147      <callout arearefs="co.datatype-char">
1148       <para>
1149        The <function>char_length</function> function is discussed in
1150        <xref linkend="functions-string">.
1151       </para>
1152      </callout>
1153     </calloutlist>
1154    </example>
1155
1156    <para>
1157     There are two other fixed-length character types in
1158     <productname>PostgreSQL</productname>, shown in <xref
1159     linkend="datatype-character-special-table">. The <type>name</type>
1160     type exists <emphasis>only</emphasis> for the storage of identifiers
1161     in the internal system catalogs and is not intended for use by the general user. Its
1162     length is currently defined as 64 bytes (63 usable characters plus
1163     terminator) but should be referenced using the constant
1164     <symbol>NAMEDATALEN</symbol> in <literal>C</> source code.
1165     The length is set at compile time (and
1166     is therefore adjustable for special uses); the default maximum
1167     length might change in a future release. The type <type>"char"</type>
1168     (note the quotes) is different from <type>char(1)</type> in that it
1169     only uses one byte of storage. It is internally used in the system
1170     catalogs as a simplistic enumeration type.
1171    </para>
1172
1173     <table id="datatype-character-special-table">
1174      <title>Special Character Types</title>
1175      <tgroup cols="3">
1176       <thead>
1177        <row>
1178         <entry>Name</entry>
1179         <entry>Storage Size</entry>
1180         <entry>Description</entry>
1181        </row>
1182       </thead>
1183       <tbody>
1184        <row>
1185         <entry><type>"char"</type></entry>
1186         <entry>1 byte</entry>
1187         <entry>single-byte internal type</entry>
1188        </row>
1189        <row>
1190         <entry><type>name</type></entry>
1191         <entry>64 bytes</entry>
1192         <entry>internal type for object names</entry>
1193        </row>
1194       </tbody>
1195      </tgroup>
1196     </table>
1197
1198   </sect1>
1199
1200  <sect1 id="datatype-binary">
1201   <title>Binary Data Types</title>
1202
1203   <indexterm zone="datatype-binary">
1204    <primary>binary data</primary>
1205   </indexterm>
1206
1207   <indexterm zone="datatype-binary">
1208    <primary>bytea</primary>
1209   </indexterm>
1210
1211    <para>
1212     The <type>bytea</type> data type allows storage of binary strings;
1213     see <xref linkend="datatype-binary-table">.
1214    </para>
1215
1216    <table id="datatype-binary-table">
1217     <title>Binary Data Types</title>
1218     <tgroup cols="3">
1219      <thead>
1220       <row>
1221        <entry>Name</entry>
1222        <entry>Storage Size</entry>
1223        <entry>Description</entry>
1224       </row>
1225      </thead>
1226      <tbody>
1227       <row>
1228        <entry><type>bytea</type></entry>
1229        <entry>1 or 4 bytes plus the actual binary string</entry>
1230        <entry>variable-length binary string</entry>
1231       </row>
1232      </tbody>
1233     </tgroup>
1234    </table>
1235
1236    <para>
1237     A binary string is a sequence of octets (or bytes).  Binary
1238     strings are distinguished from character strings in two
1239     ways.  First, binary strings specifically allow storing
1240     octets of value zero and other <quote>non-printable</quote>
1241     octets (usually, octets outside the range 32 to 126).
1242     Character strings disallow zero octets, and also disallow any
1243     other octet values and sequences of octet values that are invalid
1244     according to the database's selected character set encoding.
1245     Second, operations on binary strings process the actual bytes,
1246     whereas the processing of character strings depends on locale settings.
1247     In short, binary strings are appropriate for storing data that the
1248     programmer thinks of as <quote>raw bytes</>, whereas character
1249     strings are appropriate for storing text.
1250    </para>
1251
1252    <para>
1253     The <type>bytea</type> type supports two external formats for
1254     input and output: <productname>PostgreSQL</productname>'s historical
1255     <quote>escape</quote> format, and <quote>hex</quote> format.  Both
1256     of these are always accepted on input.  The output format depends
1257     on the configuration parameter <xref linkend="guc-bytea-output">;
1258     the default is hex.  (Note that the hex format was introduced in
1259     <productname>PostgreSQL</productname> 9.0; earlier versions and some
1260     tools don't understand it.)
1261    </para>
1262
1263    <para>
1264     The <acronym>SQL</acronym> standard defines a different binary
1265     string type, called <type>BLOB</type> or <type>BINARY LARGE
1266     OBJECT</type>.  The input format is different from
1267     <type>bytea</type>, but the provided functions and operators are
1268     mostly the same.
1269    </para>
1270
1271   <sect2>
1272    <title><type>bytea</> Hex Format</title>
1273
1274    <para>
1275     The <quote>hex</> format encodes binary data as 2 hexadecimal digits
1276     per byte, most significant nibble first.  The entire string is
1277     preceded by the sequence <literal>\x</literal> (to distinguish it
1278     from the escape format).  In some contexts, the initial backslash may
1279     need to be escaped by doubling it, in the same cases in which backslashes
1280     have to be doubled in escape format; details appear below.
1281     The hexadecimal digits can
1282     be either upper or lower case, and whitespace is permitted between
1283     digit pairs (but not within a digit pair nor in the starting
1284     <literal>\x</literal> sequence).
1285     The hex format is compatible with a wide
1286     range of external applications and protocols, and it tends to be
1287     faster to convert than the escape format, so its use is preferred.
1288    </para>
1289
1290    <para>
1291     Example:
1292 <programlisting>
1293 SELECT E'\\xDEADBEEF';
1294 </programlisting>
1295    </para>
1296   </sect2>
1297
1298   <sect2>
1299    <title><type>bytea</> Escape Format</title>
1300
1301    <para>
1302     The <quote>escape</quote> format is the traditional
1303     <productname>PostgreSQL</productname> format for the <type>bytea</type>
1304     type.  It
1305     takes the approach of representing a binary string as a sequence
1306     of ASCII characters, while converting those bytes that cannot be
1307     represented as an ASCII character into special escape sequences.
1308     If, from the point of view of the application, representing bytes
1309     as characters makes sense, then this representation can be
1310     convenient.  But in practice it is usually confusing because it
1311     fuzzes up the distinction between binary strings and character
1312     strings, and also the particular escape mechanism that was chosen is
1313     somewhat unwieldy.  So this format should probably be avoided
1314     for most new applications.
1315    </para>
1316
1317    <para>
1318     When entering <type>bytea</type> values in escape format,
1319     octets of certain
1320     values <emphasis>must</emphasis> be escaped, while all octet
1321     values <emphasis>can</emphasis> be escaped.  In
1322     general, to escape an octet, convert it into its three-digit
1323     octal value and precede it
1324     by a backslash (or two backslashes, if writing the value as a
1325     literal using escape string syntax).
1326     Backslash itself (octet value 92) can alternatively be represented by
1327     double backslashes.
1328     <xref linkend="datatype-binary-sqlesc">
1329     shows the characters that must be escaped, and gives the alternative
1330     escape sequences where applicable.
1331    </para>
1332
1333    <table id="datatype-binary-sqlesc">
1334     <title><type>bytea</> Literal Escaped Octets</title>
1335     <tgroup cols="5">
1336      <thead>
1337       <row>
1338        <entry>Decimal Octet Value</entry>
1339        <entry>Description</entry>
1340        <entry>Escaped Input Representation</entry>
1341        <entry>Example</entry>
1342        <entry>Output Representation</entry>
1343       </row>
1344      </thead>
1345
1346      <tbody>
1347       <row>
1348        <entry>0</entry>
1349        <entry>zero octet</entry>
1350        <entry><literal>E'\\000'</literal></entry>
1351        <entry><literal>SELECT E'\\000'::bytea;</literal></entry>
1352        <entry><literal>\000</literal></entry>
1353       </row>
1354
1355       <row>
1356        <entry>39</entry>
1357        <entry>single quote</entry>
1358        <entry><literal>''''</literal> or <literal>E'\\047'</literal></entry>
1359        <entry><literal>SELECT E'\''::bytea;</literal></entry>
1360        <entry><literal>'</literal></entry>
1361       </row>
1362
1363       <row>
1364        <entry>92</entry>
1365        <entry>backslash</entry>
1366        <entry><literal>E'\\\\'</literal> or <literal>E'\\134'</literal></entry>
1367        <entry><literal>SELECT E'\\\\'::bytea;</literal></entry>
1368        <entry><literal>\\</literal></entry>
1369       </row>
1370
1371       <row>
1372        <entry>0 to 31 and 127 to 255</entry>
1373        <entry><quote>non-printable</quote> octets</entry>
1374        <entry><literal>E'\\<replaceable>xxx'</></literal> (octal value)</entry>
1375        <entry><literal>SELECT E'\\001'::bytea;</literal></entry>
1376        <entry><literal>\001</literal></entry>
1377       </row>
1378
1379      </tbody>
1380     </tgroup>
1381    </table>
1382
1383    <para>
1384     The requirement to escape <emphasis>non-printable</emphasis> octets
1385     varies depending on locale settings. In some instances you can get away
1386     with leaving them unescaped. Note that the result in each of the examples
1387     in <xref linkend="datatype-binary-sqlesc"> was exactly one octet in
1388     length, even though the output representation is sometimes
1389     more than one character.
1390    </para>
1391
1392    <para>
1393     The reason multiple backslashes are required, as shown
1394     in <xref linkend="datatype-binary-sqlesc">, is that an input
1395     string written as a string literal must pass through two parse
1396     phases in the <productname>PostgreSQL</productname> server.
1397     The first backslash of each pair is interpreted as an escape
1398     character by the string-literal parser (assuming escape string
1399     syntax is used) and is therefore consumed, leaving the second backslash of the
1400     pair.  (Dollar-quoted strings can be used to avoid this level
1401     of escaping.)  The remaining backslash is then recognized by the
1402     <type>bytea</type> input function as starting either a three
1403     digit octal value or escaping another backslash.  For example,
1404     a string literal passed to the server as <literal>E'\\001'</literal>
1405     becomes <literal>\001</literal> after passing through the
1406     escape string parser. The <literal>\001</literal> is then sent
1407     to the <type>bytea</type> input function, where it is converted
1408     to a single octet with a decimal value of 1.  Note that the
1409     single-quote character is not treated specially by <type>bytea</type>,
1410     so it follows the normal rules for string literals.  (See also
1411     <xref linkend="sql-syntax-strings">.)
1412    </para>
1413
1414    <para>
1415     <type>Bytea</type> octets are sometimes escaped when output. In general, each
1416     <quote>non-printable</quote> octet is converted into
1417     its equivalent three-digit octal value and preceded by one backslash.
1418     Most <quote>printable</quote> octets are represented by their standard
1419     representation in the client character set. The octet with decimal
1420     value 92 (backslash) is doubled in the output.
1421     Details are in <xref linkend="datatype-binary-resesc">.
1422    </para>
1423
1424    <table id="datatype-binary-resesc">
1425     <title><type>bytea</> Output Escaped Octets</title>
1426     <tgroup cols="5">
1427      <thead>
1428       <row>
1429        <entry>Decimal Octet Value</entry>
1430        <entry>Description</entry>
1431        <entry>Escaped Output Representation</entry>
1432        <entry>Example</entry>
1433        <entry>Output Result</entry>
1434       </row>
1435      </thead>
1436
1437      <tbody>
1438
1439       <row>
1440        <entry>92</entry>
1441        <entry>backslash</entry>
1442        <entry><literal>\\</literal></entry>
1443        <entry><literal>SELECT E'\\134'::bytea;</literal></entry>
1444        <entry><literal>\\</literal></entry>
1445       </row>
1446
1447       <row>
1448        <entry>0 to 31 and 127 to 255</entry>
1449        <entry><quote>non-printable</quote> octets</entry>
1450        <entry><literal>\<replaceable>xxx</></literal> (octal value)</entry>
1451        <entry><literal>SELECT E'\\001'::bytea;</literal></entry>
1452        <entry><literal>\001</literal></entry>
1453       </row>
1454
1455       <row>
1456        <entry>32 to 126</entry>
1457        <entry><quote>printable</quote> octets</entry>
1458        <entry>client character set representation</entry>
1459        <entry><literal>SELECT E'\\176'::bytea;</literal></entry>
1460        <entry><literal>~</literal></entry>
1461       </row>
1462
1463      </tbody>
1464     </tgroup>
1465    </table>
1466
1467    <para>
1468     Depending on the front end to <productname>PostgreSQL</> you use,
1469     you might have additional work to do in terms of escaping and
1470     unescaping <type>bytea</type> strings. For example, you might also
1471     have to escape line feeds and carriage returns if your interface
1472     automatically translates these.
1473    </para>
1474   </sect2>
1475  </sect1>
1476
1477
1478   <sect1 id="datatype-datetime">
1479    <title>Date/Time Types</title>
1480
1481    <indexterm zone="datatype-datetime">
1482     <primary>date</primary>
1483    </indexterm>
1484    <indexterm zone="datatype-datetime">
1485     <primary>time</primary>
1486    </indexterm>
1487    <indexterm zone="datatype-datetime">
1488     <primary>time without time zone</primary>
1489    </indexterm>
1490    <indexterm zone="datatype-datetime">
1491     <primary>time with time zone</primary>
1492    </indexterm>
1493    <indexterm zone="datatype-datetime">
1494     <primary>timestamp</primary>
1495    </indexterm>
1496    <indexterm zone="datatype-datetime">
1497     <primary>timestamptz</primary>
1498    </indexterm>
1499    <indexterm zone="datatype-datetime">
1500     <primary>timestamp with time zone</primary>
1501    </indexterm>
1502    <indexterm zone="datatype-datetime">
1503     <primary>timestamp without time zone</primary>
1504    </indexterm>
1505    <indexterm zone="datatype-datetime">
1506     <primary>interval</primary>
1507    </indexterm>
1508    <indexterm zone="datatype-datetime">
1509     <primary>time span</primary>
1510    </indexterm>
1511
1512    <para>
1513     <productname>PostgreSQL</productname> supports the full set of
1514     <acronym>SQL</acronym> date and time types, shown in <xref
1515     linkend="datatype-datetime-table">.  The operations available
1516     on these data types are described in
1517     <xref linkend="functions-datetime">.
1518     Dates are counted according to the Gregorian calendar, even in
1519     years before that calendar was introduced (see <xref
1520     linkend="datetime-units-history"> for more information).
1521    </para>
1522
1523     <table id="datatype-datetime-table">
1524      <title>Date/Time Types</title>
1525      <tgroup cols="6">
1526       <thead>
1527        <row>
1528         <entry>Name</entry>
1529         <entry>Storage Size</entry>
1530         <entry>Description</entry>
1531         <entry>Low Value</entry>
1532         <entry>High Value</entry>
1533         <entry>Resolution</entry>
1534        </row>
1535       </thead>
1536       <tbody>
1537        <row>
1538         <entry><type>timestamp [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
1539         <entry>8 bytes</entry>
1540         <entry>both date and time (no time zone)</entry>
1541         <entry>4713 BC</entry>
1542         <entry>294276 AD</entry>
1543         <entry>1 microsecond / 14 digits</entry>
1544        </row>
1545        <row>
1546         <entry><type>timestamp [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
1547         <entry>8 bytes</entry>
1548         <entry>both date and time, with time zone</entry>
1549         <entry>4713 BC</entry>
1550         <entry>294276 AD</entry>
1551         <entry>1 microsecond / 14 digits</entry>
1552        </row>
1553        <row>
1554         <entry><type>date</type></entry>
1555         <entry>4 bytes</entry>
1556         <entry>date (no time of day)</entry>
1557         <entry>4713 BC</entry>
1558         <entry>5874897 AD</entry>
1559         <entry>1 day</entry>
1560        </row>
1561        <row>
1562         <entry><type>time [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
1563         <entry>8 bytes</entry>
1564         <entry>time of day (no date)</entry>
1565         <entry>00:00:00</entry>
1566         <entry>24:00:00</entry>
1567         <entry>1 microsecond / 14 digits</entry>
1568        </row>
1569        <row>
1570         <entry><type>time [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
1571         <entry>12 bytes</entry>
1572         <entry>times of day only, with time zone</entry>
1573         <entry>00:00:00+1459</entry>
1574         <entry>24:00:00-1459</entry>
1575         <entry>1 microsecond / 14 digits</entry>
1576        </row>
1577        <row>
1578         <entry><type>interval [ <replaceable>fields</replaceable> ] [ (<replaceable>p</replaceable>) ]</type></entry>
1579         <entry>12 bytes</entry>
1580         <entry>time interval</entry>
1581         <entry>-178000000 years</entry>
1582         <entry>178000000 years</entry>
1583         <entry>1 microsecond / 14 digits</entry>
1584        </row>
1585       </tbody>
1586      </tgroup>
1587     </table>
1588
1589    <note>
1590     <para>
1591      The SQL standard requires that writing just <type>timestamp</type>
1592      be equivalent to <type>timestamp without time
1593      zone</type>, and <productname>PostgreSQL</productname> honors that
1594      behavior.  (Releases prior to 7.3 treated it as <type>timestamp
1595      with time zone</type>.)  <type>timestamptz</type> is accepted as an
1596      abbreviation for <type>timestamp with time zone</type>; this is a
1597      <productname>PostgreSQL</productname> extension.
1598     </para>
1599    </note>
1600
1601    <para>
1602     <type>time</type>, <type>timestamp</type>, and
1603     <type>interval</type> accept an optional precision value
1604     <replaceable>p</replaceable> which specifies the number of
1605     fractional digits retained in the seconds field. By default, there
1606     is no explicit bound on precision.  The allowed range of
1607     <replaceable>p</replaceable> is from 0 to 6 for the
1608     <type>timestamp</type> and <type>interval</type> types.
1609    </para>
1610
1611    <note>
1612    <para>
1613     When <type>timestamp</> values are stored as eight-byte integers
1614     (currently the default), microsecond precision is available over
1615     the full range of values. When <type>timestamp</> values are
1616     stored as double precision floating-point numbers instead (a
1617     deprecated compile-time option), the effective limit of precision
1618     might be less than 6. <type>timestamp</type> values are stored as
1619     seconds before or after midnight 2000-01-01.  When
1620     <type>timestamp</type> values are implemented using floating-point
1621     numbers, microsecond precision is achieved for dates within a few
1622     years of 2000-01-01, but the precision degrades for dates further
1623     away. Note that using floating-point datetimes allows a larger
1624     range of <type>timestamp</type> values to be represented than
1625     shown above: from 4713 BC up to 5874897 AD.
1626    </para>
1627
1628    <para>
1629     The same compile-time option also determines whether
1630     <type>time</type> and <type>interval</type> values are stored as
1631     floating-point numbers or eight-byte integers.  In the
1632     floating-point case, large <type>interval</type> values degrade in
1633     precision as the size of the interval increases.
1634    </para>
1635    </note>
1636
1637    <para>
1638     For the <type>time</type> types, the allowed range of
1639     <replaceable>p</replaceable> is from 0 to 6 when eight-byte integer
1640     storage is used, or from 0 to 10 when floating-point storage is used.
1641    </para>
1642
1643    <para>
1644     The <type>interval</type> type has an additional option, which is
1645     to restrict the set of stored fields by writing one of these phrases:
1646 <literallayout class="monospaced">
1647 YEAR
1648 MONTH
1649 DAY
1650 HOUR
1651 MINUTE
1652 SECOND
1653 YEAR TO MONTH
1654 DAY TO HOUR
1655 DAY TO MINUTE
1656 DAY TO SECOND
1657 HOUR TO MINUTE
1658 HOUR TO SECOND
1659 MINUTE TO SECOND
1660 </literallayout>
1661     Note that if both <replaceable>fields</replaceable> and
1662     <replaceable>p</replaceable> are specified, the
1663     <replaceable>fields</replaceable> must include <literal>SECOND</>,
1664     since the precision applies only to the seconds.
1665    </para>
1666
1667    <para>
1668     The type <type>time with time zone</type> is defined by the SQL
1669     standard, but the definition exhibits properties which lead to
1670     questionable usefulness. In most cases, a combination of
1671     <type>date</type>, <type>time</type>, <type>timestamp without time
1672     zone</type>, and <type>timestamp with time zone</type> should
1673     provide a complete range of date/time functionality required by
1674     any application.
1675    </para>
1676
1677    <para>
1678     The types <type>abstime</type>
1679     and <type>reltime</type> are lower precision types which are used internally.
1680     You are discouraged from using these types in
1681     applications;  these internal types
1682     might disappear in a future release.
1683    </para>
1684
1685    <sect2 id="datatype-datetime-input">
1686     <title>Date/Time Input</title>
1687
1688     <para>
1689      Date and time input is accepted in almost any reasonable format, including
1690      ISO 8601, <acronym>SQL</acronym>-compatible,
1691      traditional <productname>POSTGRES</productname>, and others.
1692      For some formats, ordering of day, month, and year in date input is
1693      ambiguous and there is support for specifying the expected
1694      ordering of these fields.  Set the <xref linkend="guc-datestyle"> parameter
1695      to <literal>MDY</> to select month-day-year interpretation,
1696      <literal>DMY</> to select day-month-year interpretation, or
1697      <literal>YMD</> to select year-month-day interpretation.
1698     </para>
1699
1700     <para>
1701      <productname>PostgreSQL</productname> is more flexible in
1702      handling date/time input than the
1703      <acronym>SQL</acronym> standard requires.
1704      See <xref linkend="datetime-appendix">
1705      for the exact parsing rules of date/time input and for the
1706      recognized text fields including months, days of the week, and
1707      time zones.
1708     </para>
1709
1710     <para>
1711      Remember that any date or time literal input needs to be enclosed
1712      in single quotes, like text strings.  Refer to
1713      <xref linkend="sql-syntax-constants-generic"> for more
1714      information.
1715      <acronym>SQL</acronym> requires the following syntax
1716 <synopsis>
1717 <replaceable>type</replaceable> [ (<replaceable>p</replaceable>) ] '<replaceable>value</replaceable>'
1718 </synopsis>
1719      where <replaceable>p</replaceable> is an optional precision
1720      specification giving the number of
1721      fractional digits in the seconds field. Precision can be
1722      specified for <type>time</type>, <type>timestamp</type>, and
1723      <type>interval</type> types.  The allowed values are mentioned
1724      above.  If no precision is specified in a constant specification,
1725      it defaults to the precision of the literal value.
1726     </para>
1727
1728     <sect3>
1729     <title>Dates</title>
1730
1731     <indexterm>
1732      <primary>date</primary>
1733     </indexterm>
1734
1735     <para>
1736      <xref linkend="datatype-datetime-date-table"> shows some possible
1737      inputs for the <type>date</type> type.
1738     </para>
1739
1740      <table id="datatype-datetime-date-table">
1741       <title>Date Input</title>
1742       <tgroup cols="2">
1743        <thead>
1744         <row>
1745          <entry>Example</entry>
1746          <entry>Description</entry>
1747         </row>
1748        </thead>
1749        <tbody>
1750         <row>
1751          <entry>1999-01-08</entry>
1752          <entry>ISO 8601; January 8 in any mode
1753          (recommended format)</entry>
1754         </row>
1755         <row>
1756          <entry>January 8, 1999</entry>
1757          <entry>unambiguous in any <varname>datestyle</varname> input mode</entry>
1758         </row>
1759         <row>
1760          <entry>1/8/1999</entry>
1761          <entry>January 8 in <literal>MDY</> mode;
1762           August 1 in <literal>DMY</> mode</entry>
1763         </row>
1764         <row>
1765          <entry>1/18/1999</entry>
1766          <entry>January 18 in <literal>MDY</> mode;
1767           rejected in other modes</entry>
1768         </row>
1769         <row>
1770          <entry>01/02/03</entry>
1771          <entry>January 2, 2003 in <literal>MDY</> mode;
1772           February 1, 2003 in <literal>DMY</> mode;
1773           February 3, 2001 in <literal>YMD</> mode
1774          </entry>
1775         </row>
1776         <row>
1777          <entry>1999-Jan-08</entry>
1778          <entry>January 8 in any mode</entry>
1779         </row>
1780         <row>
1781          <entry>Jan-08-1999</entry>
1782          <entry>January 8 in any mode</entry>
1783         </row>
1784         <row>
1785          <entry>08-Jan-1999</entry>
1786          <entry>January 8 in any mode</entry>
1787         </row>
1788         <row>
1789          <entry>99-Jan-08</entry>
1790          <entry>January 8 in <literal>YMD</> mode, else error</entry>
1791         </row>
1792         <row>
1793          <entry>08-Jan-99</entry>
1794          <entry>January 8, except error in <literal>YMD</> mode</entry>
1795         </row>
1796         <row>
1797          <entry>Jan-08-99</entry>
1798          <entry>January 8, except error in <literal>YMD</> mode</entry>
1799         </row>
1800         <row>
1801          <entry>19990108</entry>
1802          <entry>ISO 8601; January 8, 1999 in any mode</entry>
1803         </row>
1804         <row>
1805          <entry>990108</entry>
1806          <entry>ISO 8601; January 8, 1999 in any mode</entry>
1807         </row>
1808         <row>
1809          <entry>1999.008</entry>
1810          <entry>year and day of year</entry>
1811         </row>
1812         <row>
1813          <entry>J2451187</entry>
1814          <entry>Julian date</entry>
1815         </row>
1816         <row>
1817          <entry>January 8, 99 BC</entry>
1818          <entry>year 99 BC</entry>
1819         </row>
1820        </tbody>
1821       </tgroup>
1822      </table>
1823     </sect3>
1824
1825     <sect3>
1826      <title>Times</title>
1827
1828      <indexterm>
1829       <primary>time</primary>
1830      </indexterm>
1831      <indexterm>
1832       <primary>time without time zone</primary>
1833      </indexterm>
1834      <indexterm>
1835       <primary>time with time zone</primary>
1836      </indexterm>
1837
1838      <para>
1839       The time-of-day types are <type>time [
1840       (<replaceable>p</replaceable>) ] without time zone</type> and
1841       <type>time [ (<replaceable>p</replaceable>) ] with time
1842       zone</type>.  <type>time</type> alone is equivalent to
1843       <type>time without time zone</type>.
1844      </para>
1845
1846      <para>
1847       Valid input for these types consists of a time of day followed
1848       by an optional time zone. (See <xref
1849       linkend="datatype-datetime-time-table">
1850       and <xref linkend="datatype-timezone-table">.)  If a time zone is
1851       specified in the input for <type>time without time zone</type>,
1852       it is silently ignored. You can also specify a date but it will
1853       be ignored, except when you use a time zone name that involves a
1854       daylight-savings rule, such as
1855       <literal>America/New_York</literal>. In this case specifying the date
1856       is required in order to determine whether standard or daylight-savings
1857       time applies.  The appropriate time zone offset is recorded in the
1858       <type>time with time zone</type> value.
1859      </para>
1860
1861       <table id="datatype-datetime-time-table">
1862        <title>Time Input</title>
1863        <tgroup cols="2">
1864         <thead>
1865          <row>
1866           <entry>Example</entry>
1867           <entry>Description</entry>
1868          </row>
1869         </thead>
1870         <tbody>
1871          <row>
1872           <entry><literal>04:05:06.789</literal></entry>
1873           <entry>ISO 8601</entry>
1874          </row>
1875          <row>
1876           <entry><literal>04:05:06</literal></entry>
1877           <entry>ISO 8601</entry>
1878          </row>
1879          <row>
1880           <entry><literal>04:05</literal></entry>
1881           <entry>ISO 8601</entry>
1882          </row>
1883          <row>
1884           <entry><literal>040506</literal></entry>
1885           <entry>ISO 8601</entry>
1886          </row>
1887          <row>
1888           <entry><literal>04:05 AM</literal></entry>
1889           <entry>same as 04:05; AM does not affect value</entry>
1890          </row>
1891          <row>
1892           <entry><literal>04:05 PM</literal></entry>
1893           <entry>same as 16:05; input hour must be &lt;= 12</entry>
1894          </row>
1895          <row>
1896           <entry><literal>04:05:06.789-8</literal></entry>
1897           <entry>ISO 8601</entry>
1898          </row>
1899          <row>
1900           <entry><literal>04:05:06-08:00</literal></entry>
1901           <entry>ISO 8601</entry>
1902          </row>
1903          <row>
1904           <entry><literal>04:05-08:00</literal></entry>
1905           <entry>ISO 8601</entry>
1906          </row>
1907          <row>
1908           <entry><literal>040506-08</literal></entry>
1909           <entry>ISO 8601</entry>
1910          </row>
1911          <row>
1912           <entry><literal>04:05:06 PST</literal></entry>
1913           <entry>time zone specified by abbreviation</entry>
1914          </row>
1915          <row>
1916           <entry><literal>2003-04-12 04:05:06 America/New_York</literal></entry>
1917           <entry>time zone specified by full name</entry>
1918          </row>
1919         </tbody>
1920        </tgroup>
1921       </table>
1922
1923       <table tocentry="1" id="datatype-timezone-table">
1924        <title>Time Zone Input</title>
1925        <tgroup cols="2">
1926         <thead>
1927          <row>
1928           <entry>Example</entry>
1929           <entry>Description</entry>
1930          </row>
1931         </thead>
1932         <tbody>
1933          <row>
1934           <entry><literal>PST</literal></entry>
1935           <entry>Abbreviation (for Pacific Standard Time)</entry>
1936          </row>
1937          <row>
1938           <entry><literal>America/New_York</literal></entry>
1939           <entry>Full time zone name</entry>
1940          </row>
1941          <row>
1942           <entry><literal>PST8PDT</literal></entry>
1943           <entry>POSIX-style time zone specification</entry>
1944          </row>
1945          <row>
1946           <entry><literal>-8:00</literal></entry>
1947           <entry>ISO-8601 offset for PST</entry>
1948          </row>
1949          <row>
1950           <entry><literal>-800</literal></entry>
1951           <entry>ISO-8601 offset for PST</entry>
1952          </row>
1953          <row>
1954           <entry><literal>-8</literal></entry>
1955           <entry>ISO-8601 offset for PST</entry>
1956          </row>
1957          <row>
1958           <entry><literal>zulu</literal></entry>
1959           <entry>Military abbreviation for UTC</entry>
1960          </row>
1961          <row>
1962           <entry><literal>z</literal></entry>
1963           <entry>Short form of <literal>zulu</literal></entry>
1964          </row>
1965         </tbody>
1966        </tgroup>
1967       </table>
1968
1969      <para>
1970      Refer to <xref linkend="datatype-timezones"> for more information on how
1971      to specify time zones.
1972     </para>
1973     </sect3>
1974
1975     <sect3>
1976     <title>Time Stamps</title>
1977
1978     <indexterm>
1979      <primary>timestamp</primary>
1980     </indexterm>
1981
1982     <indexterm>
1983      <primary>timestamp with time zone</primary>
1984     </indexterm>
1985
1986     <indexterm>
1987      <primary>timestamp without time zone</primary>
1988     </indexterm>
1989
1990      <para>
1991       Valid input for the time stamp types consists of the concatenation
1992       of a date and a time, followed by an optional time zone,
1993       followed by an optional <literal>AD</literal> or <literal>BC</literal>.
1994       (Alternatively, <literal>AD</literal>/<literal>BC</literal> can appear
1995       before the time zone, but this is not the preferred ordering.)
1996       Thus:
1997
1998 <programlisting>
1999 1999-01-08 04:05:06
2000 </programlisting>
2001       and:
2002 <programlisting>
2003 1999-01-08 04:05:06 -8:00
2004 </programlisting>
2005
2006       are valid values, which follow the <acronym>ISO</acronym> 8601
2007       standard.  In addition, the common format:
2008 <programlisting>
2009 January 8 04:05:06 1999 PST
2010 </programlisting>
2011       is supported.
2012      </para>
2013
2014      <para>
2015       The <acronym>SQL</acronym> standard differentiates
2016       <type>timestamp without time zone</type>
2017       and <type>timestamp with time zone</type> literals by the presence of a
2018       <quote>+</quote> or <quote>-</quote> symbol and time zone offset after
2019       the time.  Hence, according to the standard,
2020
2021       <programlisting>TIMESTAMP '2004-10-19 10:23:54'</programlisting>
2022
2023       is a <type>timestamp without time zone</type>, while
2024
2025       <programlisting>TIMESTAMP '2004-10-19 10:23:54+02'</programlisting>
2026
2027       is a <type>timestamp with time zone</type>.
2028       <productname>PostgreSQL</productname> never examines the content of a
2029       literal string before determining its type, and therefore will treat
2030       both of the above as <type>timestamp without time zone</type>.  To
2031       ensure that a literal is treated as <type>timestamp with time
2032       zone</type>, give it the correct explicit type:
2033
2034       <programlisting>TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'</programlisting>
2035
2036       In a literal that has been determined to be <type>timestamp without time
2037       zone</type>, <productname>PostgreSQL</productname> will silently ignore
2038       any time zone indication.
2039       That is, the resulting value is derived from the date/time
2040       fields in the input value, and is not adjusted for time zone.
2041      </para>
2042
2043      <para>
2044       For <type>timestamp with time zone</type>, the internally stored
2045       value is always in UTC (Universal
2046       Coordinated Time, traditionally known as Greenwich Mean Time,
2047       <acronym>GMT</>).  An input value that has an explicit
2048       time zone specified is converted to UTC using the appropriate offset
2049       for that time zone.  If no time zone is stated in the input string,
2050       then it is assumed to be in the time zone indicated by the system's
2051       <xref linkend="guc-timezone"> parameter, and is converted to UTC using the
2052       offset for the <varname>timezone</> zone.
2053      </para>
2054
2055      <para>
2056       When a <type>timestamp with time
2057       zone</type> value is output, it is always converted from UTC to the
2058       current <varname>timezone</> zone, and displayed as local time in that
2059       zone.  To see the time in another time zone, either change
2060       <varname>timezone</> or use the <literal>AT TIME ZONE</> construct
2061       (see <xref linkend="functions-datetime-zoneconvert">).
2062      </para>
2063
2064      <para>
2065       Conversions between <type>timestamp without time zone</type> and
2066       <type>timestamp with time zone</type> normally assume that the
2067       <type>timestamp without time zone</type> value should be taken or given
2068       as <varname>timezone</> local time.  A different time zone can
2069       be specified for the conversion using <literal>AT TIME ZONE</>.
2070      </para>
2071     </sect3>
2072
2073     <sect3>
2074      <title>Special Values</title>
2075
2076      <indexterm>
2077       <primary>time</primary>
2078       <secondary>constants</secondary>
2079      </indexterm>
2080
2081      <indexterm>
2082       <primary>date</primary>
2083       <secondary>constants</secondary>
2084      </indexterm>
2085
2086      <para>
2087       <productname>PostgreSQL</productname> supports several
2088       special date/time input values for convenience, as shown in <xref
2089       linkend="datatype-datetime-special-table">.  The values
2090       <literal>infinity</literal> and <literal>-infinity</literal>
2091       are specially represented inside the system and will be displayed
2092       unchanged; but the others are simply notational shorthands
2093       that will be converted to ordinary date/time values when read.
2094       (In particular, <literal>now</> and related strings are converted
2095       to a specific time value as soon as they are read.)
2096       All of these values need to be enclosed in single quotes when used
2097       as constants in SQL commands.
2098      </para>
2099
2100       <table id="datatype-datetime-special-table">
2101        <title>Special Date/Time Inputs</title>
2102        <tgroup cols="3">
2103         <thead>
2104          <row>
2105           <entry>Input String</entry>
2106           <entry>Valid Types</entry>
2107           <entry>Description</entry>
2108          </row>
2109         </thead>
2110         <tbody>
2111          <row>
2112           <entry><literal>epoch</literal></entry>
2113           <entry><type>date</type>, <type>timestamp</type></entry>
2114           <entry>1970-01-01 00:00:00+00 (Unix system time zero)</entry>
2115          </row>
2116          <row>
2117           <entry><literal>infinity</literal></entry>
2118           <entry><type>date</type>, <type>timestamp</type></entry>
2119           <entry>later than all other time stamps</entry>
2120          </row>
2121          <row>
2122           <entry><literal>-infinity</literal></entry>
2123           <entry><type>date</type>, <type>timestamp</type></entry>
2124           <entry>earlier than all other time stamps</entry>
2125          </row>
2126          <row>
2127           <entry><literal>now</literal></entry>
2128           <entry><type>date</type>, <type>time</type>, <type>timestamp</type></entry>
2129           <entry>current transaction's start time</entry>
2130          </row>
2131          <row>
2132           <entry><literal>today</literal></entry>
2133           <entry><type>date</type>, <type>timestamp</type></entry>
2134           <entry>midnight today</entry>
2135          </row>
2136          <row>
2137           <entry><literal>tomorrow</literal></entry>
2138           <entry><type>date</type>, <type>timestamp</type></entry>
2139           <entry>midnight tomorrow</entry>
2140          </row>
2141          <row>
2142           <entry><literal>yesterday</literal></entry>
2143           <entry><type>date</type>, <type>timestamp</type></entry>
2144           <entry>midnight yesterday</entry>
2145          </row>
2146          <row>
2147           <entry><literal>allballs</literal></entry>
2148           <entry><type>time</type></entry>
2149           <entry>00:00:00.00 UTC</entry>
2150          </row>
2151         </tbody>
2152        </tgroup>
2153       </table>
2154
2155      <para>
2156       The following <acronym>SQL</acronym>-compatible functions can also
2157       be used to obtain the current time value for the corresponding data
2158       type:
2159       <literal>CURRENT_DATE</literal>, <literal>CURRENT_TIME</literal>,
2160       <literal>CURRENT_TIMESTAMP</literal>, <literal>LOCALTIME</literal>,
2161       <literal>LOCALTIMESTAMP</literal>.  The latter four accept an
2162       optional subsecond precision specification.  (See <xref
2163       linkend="functions-datetime-current">.)  Note that these are
2164       SQL functions and are <emphasis>not</> recognized in data input strings.
2165      </para>
2166
2167     </sect3>
2168    </sect2>
2169
2170    <sect2 id="datatype-datetime-output">
2171     <title>Date/Time Output</title>
2172
2173     <indexterm>
2174      <primary>date</primary>
2175      <secondary>output format</secondary>
2176      <seealso>formatting</seealso>
2177     </indexterm>
2178
2179     <indexterm>
2180      <primary>time</primary>
2181      <secondary>output format</secondary>
2182      <seealso>formatting</seealso>
2183     </indexterm>
2184
2185     <para>
2186      The output format of the date/time types can be set to one of the four
2187      styles ISO 8601,
2188      <acronym>SQL</acronym> (Ingres), traditional <productname>POSTGRES</>
2189      (Unix <application>date</> format), or
2190      German.  The default
2191      is the <acronym>ISO</acronym> format.  (The
2192      <acronym>SQL</acronym> standard requires the use of the ISO 8601
2193      format.  The name of the <quote>SQL</quote> output format is a
2194      historical accident.)  <xref
2195      linkend="datatype-datetime-output-table"> shows examples of each
2196      output style.  The output of the <type>date</type> and
2197      <type>time</type> types is of course only the date or time part
2198      in accordance with the given examples.
2199     </para>
2200
2201      <table id="datatype-datetime-output-table">
2202       <title>Date/Time Output Styles</title>
2203       <tgroup cols="3">
2204        <thead>
2205         <row>
2206          <entry>Style Specification</entry>
2207          <entry>Description</entry>
2208          <entry>Example</entry>
2209         </row>
2210        </thead>
2211        <tbody>
2212         <row>
2213          <entry><literal>ISO</literal></entry>
2214          <entry>ISO 8601, SQL standard</entry>
2215          <entry><literal>1997-12-17 07:37:16-08</literal></entry>
2216         </row>
2217         <row>
2218          <entry><literal>SQL</literal></entry>
2219          <entry>traditional style</entry>
2220          <entry><literal>12/17/1997 07:37:16.00 PST</literal></entry>
2221         </row>
2222         <row>
2223          <entry><literal>Postgres</literal></entry>
2224          <entry>original style</entry>
2225          <entry><literal>Wed Dec 17 07:37:16 1997 PST</literal></entry>
2226         </row>
2227         <row>
2228          <entry><literal>German</literal></entry>
2229          <entry>regional style</entry>
2230          <entry><literal>17.12.1997 07:37:16.00 PST</literal></entry>
2231         </row>
2232        </tbody>
2233       </tgroup>
2234      </table>
2235
2236     <note>
2237      <para>
2238       ISO 8601 specifies the use of uppercase letter <literal>T</> to separate
2239       the date and time.  <productname>PostgreSQL</> accepts that format on
2240       input, but on output it uses a space rather than <literal>T</>, as shown
2241       above.  This is for readability and for consistency with RFC 3339 as
2242       well as some other database systems.
2243      </para>
2244     </note>
2245
2246     <para>
2247      In the <acronym>SQL</acronym> and POSTGRES styles, day appears before
2248      month if DMY field ordering has been specified, otherwise month appears
2249      before day.
2250      (See <xref linkend="datatype-datetime-input">
2251      for how this setting also affects interpretation of input values.)
2252      <xref linkend="datatype-datetime-output2-table"> shows examples.
2253     </para>
2254
2255      <table id="datatype-datetime-output2-table">
2256       <title>Date Order Conventions</title>
2257       <tgroup cols="3">
2258        <thead>
2259         <row>
2260          <entry><varname>datestyle</varname> Setting</entry>
2261          <entry>Input Ordering</entry>
2262          <entry>Example Output</entry>
2263         </row>
2264        </thead>
2265        <tbody>
2266         <row>
2267          <entry><literal>SQL, DMY</></entry>
2268          <entry><replaceable>day</replaceable>/<replaceable>month</replaceable>/<replaceable>year</replaceable></entry>
2269          <entry><literal>17/12/1997 15:37:16.00 CET</literal></entry>
2270         </row>
2271         <row>
2272          <entry><literal>SQL, MDY</></entry>
2273          <entry><replaceable>month</replaceable>/<replaceable>day</replaceable>/<replaceable>year</replaceable></entry>
2274          <entry><literal>12/17/1997 07:37:16.00 PST</literal></entry>
2275         </row>
2276         <row>
2277          <entry><literal>Postgres, DMY</></entry>
2278          <entry><replaceable>day</replaceable>/<replaceable>month</replaceable>/<replaceable>year</replaceable></entry>
2279          <entry><literal>Wed 17 Dec 07:37:16 1997 PST</literal></entry>
2280         </row>
2281        </tbody>
2282       </tgroup>
2283      </table>
2284
2285     <para>
2286      The date/time style can be selected by the user using the
2287      <command>SET datestyle</command> command, the <xref
2288      linkend="guc-datestyle"> parameter in the
2289      <filename>postgresql.conf</filename> configuration file, or the
2290      <envar>PGDATESTYLE</envar> environment variable on the server or
2291      client.
2292     </para>
2293
2294     <para>
2295      The formatting function <function>to_char</function>
2296      (see <xref linkend="functions-formatting">) is also available as
2297      a more flexible way to format date/time output.
2298     </para>
2299    </sect2>
2300
2301    <sect2 id="datatype-timezones">
2302     <title>Time Zones</title>
2303
2304     <indexterm zone="datatype-timezones">
2305      <primary>time zone</primary>
2306     </indexterm>
2307
2308    <para>
2309     Time zones, and time-zone conventions, are influenced by
2310     political decisions, not just earth geometry. Time zones around the
2311     world became somewhat standardized during the 1900's,
2312     but continue to be prone to arbitrary changes, particularly with
2313     respect to daylight-savings rules.
2314     <productname>PostgreSQL</productname> uses the widely-used
2315     <literal>zoneinfo</> (Olson) time zone database for information about
2316     historical time zone rules.  For times in the future, the assumption
2317     is that the latest known rules for a given time zone will
2318     continue to be observed indefinitely far into the future.
2319    </para>
2320
2321     <para>
2322      <productname>PostgreSQL</productname> endeavors to be compatible with
2323      the <acronym>SQL</acronym> standard definitions for typical usage.
2324      However, the <acronym>SQL</acronym> standard has an odd mix of date and
2325      time types and capabilities. Two obvious problems are:
2326
2327      <itemizedlist>
2328       <listitem>
2329        <para>
2330         Although the <type>date</type> type
2331         cannot have an associated time zone, the
2332         <type>time</type> type can.
2333         Time zones in the real world have little meaning unless
2334         associated with a date as well as a time,
2335         since the offset can vary through the year with daylight-saving
2336         time boundaries.
2337        </para>
2338       </listitem>
2339
2340       <listitem>
2341        <para>
2342         The default time zone is specified as a constant numeric offset
2343         from <acronym>UTC</>. It is therefore impossible to adapt to
2344         daylight-saving time when doing date/time arithmetic across
2345         <acronym>DST</acronym> boundaries.
2346        </para>
2347       </listitem>
2348
2349      </itemizedlist>
2350     </para>
2351
2352     <para>
2353      To address these difficulties, we recommend using date/time types
2354      that contain both date and time when using time zones. We
2355      do <emphasis>not</> recommend using the type <type>time with
2356      time zone</type> (though it is supported by
2357      <productname>PostgreSQL</productname> for legacy applications and
2358      for compliance with the <acronym>SQL</acronym> standard).
2359      <productname>PostgreSQL</productname> assumes
2360      your local time zone for any type containing only date or time.
2361     </para>
2362
2363     <para>
2364      All timezone-aware dates and times are stored internally in
2365      <acronym>UTC</acronym>.  They are converted to local time
2366      in the zone specified by the <xref linkend="guc-timezone"> configuration
2367      parameter before being displayed to the client.
2368     </para>
2369
2370     <para>
2371      <productname>PostgreSQL</productname> allows you to specify time zones in
2372      three different forms:
2373      <itemizedlist>
2374       <listitem>
2375        <para>
2376         A full time zone name, for example <literal>America/New_York</>.
2377         The recognized time zone names are listed in the
2378         <literal>pg_timezone_names</literal> view (see <xref
2379         linkend="view-pg-timezone-names">).
2380         <productname>PostgreSQL</productname> uses the widely-used
2381         <literal>zoneinfo</> time zone data for this purpose, so the same
2382         names are also recognized by much other software.
2383        </para>
2384       </listitem>
2385       <listitem>
2386        <para>
2387         A time zone abbreviation, for example <literal>PST</>.  Such a
2388         specification merely defines a particular offset from UTC, in
2389         contrast to full time zone names which can imply a set of daylight
2390         savings transition-date rules as well.  The recognized abbreviations
2391         are listed in the <literal>pg_timezone_abbrevs</> view (see <xref
2392         linkend="view-pg-timezone-abbrevs">).  You cannot set the
2393         configuration parameters <xref linkend="guc-timezone"> or
2394         <xref linkend="guc-log-timezone"> to a time
2395         zone abbreviation, but you can use abbreviations in
2396         date/time input values and with the <literal>AT TIME ZONE</>
2397         operator.
2398        </para>
2399       </listitem>
2400       <listitem>
2401        <para>
2402         In addition to the timezone names and abbreviations,
2403         <productname>PostgreSQL</productname> will accept POSIX-style time zone
2404         specifications of the form <replaceable>STD</><replaceable>offset</> or
2405         <replaceable>STD</><replaceable>offset</><replaceable>DST</>, where
2406         <replaceable>STD</> is a zone abbreviation, <replaceable>offset</> is a
2407         numeric offset in hours west from UTC, and <replaceable>DST</> is an
2408         optional daylight-savings zone abbreviation, assumed to stand for one
2409         hour ahead of the given offset. For example, if <literal>EST5EDT</>
2410         were not already a recognized zone name, it would be accepted and would
2411         be functionally equivalent to United States East Coast time.  When a
2412         daylight-savings zone name is present, it is assumed to be used
2413         according to the same daylight-savings transition rules used in the
2414         <literal>zoneinfo</> time zone database's <filename>posixrules</> entry.
2415         In a standard <productname>PostgreSQL</productname> installation,
2416         <filename>posixrules</> is the same as <literal>US/Eastern</>, so
2417         that POSIX-style time zone specifications follow USA daylight-savings
2418         rules.  If needed, you can adjust this behavior by replacing the
2419         <filename>posixrules</> file.
2420        </para>
2421       </listitem>
2422      </itemizedlist>
2423
2424      In short, this is the difference between abbreviations
2425      and full names: abbreviations always represent a fixed offset from
2426      UTC, whereas most of the full names imply a local daylight-savings time
2427      rule, and so have two possible UTC offsets.
2428     </para>
2429
2430     <para>
2431      One should be wary that the POSIX-style time zone feature can
2432      lead to silently accepting bogus input, since there is no check on the
2433      reasonableness of the zone abbreviations.  For example, <literal>SET
2434      TIMEZONE TO FOOBAR0</> will work, leaving the system effectively using
2435      a rather peculiar abbreviation for UTC.
2436      Another issue to keep in mind is that in POSIX time zone names,
2437      positive offsets are used for locations <emphasis>west</> of Greenwich.
2438      Everywhere else, <productname>PostgreSQL</productname> follows the
2439      ISO-8601 convention that positive timezone offsets are <emphasis>east</>
2440      of Greenwich.
2441     </para>
2442
2443     <para>
2444      In all cases, timezone names are recognized case-insensitively.
2445      (This is a change from <productname>PostgreSQL</productname> versions
2446      prior to 8.2, which were case-sensitive in some contexts but not others.)
2447     </para>
2448
2449     <para>
2450      Neither full names nor abbreviations are hard-wired into the server;
2451      they are obtained from configuration files stored under
2452      <filename>.../share/timezone/</> and <filename>.../share/timezonesets/</>
2453      of the installation directory
2454      (see <xref linkend="datetime-config-files">).
2455     </para>
2456
2457     <para>
2458      The <xref linkend="guc-timezone"> configuration parameter can
2459      be set in the file <filename>postgresql.conf</>, or in any of the
2460      other standard ways described in <xref linkend="runtime-config">.
2461      There are also some special ways to set it:
2462
2463      <itemizedlist>
2464       <listitem>
2465        <para>
2466         The <acronym>SQL</acronym> command <command>SET TIME ZONE</command>
2467         sets the time zone for the session.  This is an alternative spelling
2468         of <command>SET TIMEZONE TO</> with a more SQL-spec-compatible syntax.
2469        </para>
2470       </listitem>
2471
2472       <listitem>
2473        <para>
2474         The <envar>PGTZ</envar> environment variable is used by
2475         <application>libpq</application> clients
2476         to send a <command>SET TIME ZONE</command>
2477         command to the server upon connection.
2478        </para>
2479       </listitem>
2480      </itemizedlist>
2481     </para>
2482    </sect2>
2483
2484    <sect2 id="datatype-interval-input">
2485     <title>Interval Input</title>
2486
2487     <indexterm>
2488      <primary>interval</primary>
2489     </indexterm>
2490
2491      <para>
2492       <type>interval</type> values can be written using the following
2493       verbose syntax:
2494
2495 <synopsis>
2496 <optional>@</> <replaceable>quantity</> <replaceable>unit</> <optional><replaceable>quantity</> <replaceable>unit</>...</> <optional><replaceable>direction</></optional>
2497 </synopsis>
2498
2499      where <replaceable>quantity</> is a number (possibly signed);
2500      <replaceable>unit</> is <literal>microsecond</literal>,
2501      <literal>millisecond</literal>, <literal>second</literal>,
2502      <literal>minute</literal>, <literal>hour</literal>, <literal>day</literal>,
2503      <literal>week</literal>, <literal>month</literal>, <literal>year</literal>,
2504      <literal>decade</literal>, <literal>century</literal>, <literal>millennium</literal>,
2505      or abbreviations or plurals of these units;
2506      <replaceable>direction</> can be <literal>ago</literal> or
2507      empty.  The at sign (<literal>@</>) is optional noise.  The amounts
2508      of the different units are implicitly added with appropriate
2509      sign accounting.  <literal>ago</literal> negates all the fields.
2510      This syntax is also used for interval output, if
2511      <xref linkend="guc-intervalstyle"> is set to
2512      <literal>postgres_verbose</>.
2513     </para>
2514
2515     <para>
2516      Quantities of days, hours, minutes, and seconds can be specified without
2517      explicit unit markings.  For example, <literal>'1 12:59:10'</> is read
2518      the same as <literal>'1 day 12 hours 59 min 10 sec'</>.  Also,
2519      a combination of years and months can be specified with a dash;
2520      for example <literal>'200-10'</> is read the same as <literal>'200 years
2521      10 months'</>.  (These shorter forms are in fact the only ones allowed
2522      by the <acronym>SQL</acronym> standard, and are used for output when
2523      <varname>IntervalStyle</> is set to <literal>sql_standard</literal>.)
2524     </para>
2525
2526     <para>
2527      Interval values can also be written as ISO 8601 time intervals, using
2528      either the <quote>format with designators</> of the standard's section
2529      4.4.3.2 or the <quote>alternative format</> of section 4.4.3.3.  The
2530      format with designators looks like this:
2531 <synopsis>
2532 P <replaceable>quantity</> <replaceable>unit</> <optional> <replaceable>quantity</> <replaceable>unit</> ...</optional> <optional> T <optional> <replaceable>quantity</> <replaceable>unit</> ...</optional></optional>
2533 </synopsis>
2534       The string must start with a <literal>P</>, and may include a
2535       <literal>T</> that introduces the time-of-day units.  The
2536       available unit abbreviations are given in <xref
2537       linkend="datatype-interval-iso8601-units">.  Units may be
2538       omitted, and may be specified in any order, but units smaller than
2539       a day must appear after <literal>T</>.  In particular, the meaning of
2540       <literal>M</> depends on whether it is before or after
2541       <literal>T</>.
2542      </para>
2543
2544      <table id="datatype-interval-iso8601-units">
2545       <title>ISO 8601 Interval Unit Abbreviations</title>
2546      <tgroup cols="2">
2547        <thead>
2548         <row>
2549          <entry>Abbreviation</entry>
2550          <entry>Meaning</entry>
2551         </row>
2552        </thead>
2553        <tbody>
2554         <row>
2555          <entry>Y</entry>
2556          <entry>Years</entry>
2557         </row>
2558         <row>
2559          <entry>M</entry>
2560          <entry>Months (in the date part)</entry>
2561         </row>
2562         <row>
2563          <entry>W</entry>
2564          <entry>Weeks</entry>
2565         </row>
2566         <row>
2567          <entry>D</entry>
2568          <entry>Days</entry>
2569         </row>
2570         <row>
2571          <entry>H</entry>
2572          <entry>Hours</entry>
2573         </row>
2574         <row>
2575          <entry>M</entry>
2576          <entry>Minutes (in the time part)</entry>
2577         </row>
2578         <row>
2579          <entry>S</entry>
2580          <entry>Seconds</entry>
2581         </row>
2582        </tbody>
2583       </tgroup>
2584      </table>
2585
2586      <para>
2587       In the alternative format:
2588 <synopsis>
2589 P <optional> <replaceable>years</>-<replaceable>months</>-<replaceable>days</> </optional> <optional> T <replaceable>hours</>:<replaceable>minutes</>:<replaceable>seconds</> </optional>
2590 </synopsis>
2591       the string must begin with <literal>P</literal>, and a
2592       <literal>T</> separates the date and time parts of the interval.
2593       The values are given as numbers similar to ISO 8601 dates.
2594     </para>
2595
2596     <para>
2597      When writing an interval constant with a <replaceable>fields</>
2598      specification, or when assigning a string to an interval column that was
2599      defined with a <replaceable>fields</> specification, the interpretation of
2600      unmarked quantities depends on the <replaceable>fields</>.  For
2601      example <literal>INTERVAL '1' YEAR</> is read as 1 year, whereas
2602      <literal>INTERVAL '1'</> means 1 second.  Also, field values
2603      <quote>to the right</> of the least significant field allowed by the
2604      <replaceable>fields</> specification are silently discarded.  For
2605      example, writing <literal>INTERVAL '1 day 2:03:04' HOUR TO MINUTE</>
2606      results in dropping the seconds field, but not the day field.
2607     </para>
2608
2609     <para>
2610      According to the <acronym>SQL</> standard all fields of an interval
2611      value must have the same sign, so a leading negative sign applies to all
2612      fields; for example the negative sign in the interval literal
2613      <literal>'-1 2:03:04'</> applies to both the days and hour/minute/second
2614      parts.  <productname>PostgreSQL</> allows the fields to have different
2615      signs, and traditionally treats each field in the textual representation
2616      as independently signed, so that the hour/minute/second part is
2617      considered positive in this example.  If <varname>IntervalStyle</> is
2618      set to <literal>sql_standard</literal> then a leading sign is considered
2619      to apply to all fields (but only if no additional signs appear).
2620      Otherwise the traditional <productname>PostgreSQL</> interpretation is
2621      used.  To avoid ambiguity, it's recommended to attach an explicit sign
2622      to each field if any field is negative.
2623     </para>
2624
2625     <para>
2626      Internally <type>interval</> values are stored as months, days,
2627      and seconds. This is done because the number of days in a month
2628      varies, and a day can have 23 or 25 hours if a daylight savings
2629      time adjustment is involved.  The months and days fields are integers
2630      while the seconds field can store fractions.  Because intervals are
2631      usually created from constant strings or <type>timestamp</> subtraction,
2632      this storage method works well in most cases. Functions
2633      <function>justify_days</> and <function>justify_hours</> are
2634      available for adjusting days and hours that overflow their normal
2635      ranges.
2636     </para>
2637
2638     <para>
2639      In the verbose input format, and in some fields of the more compact
2640      input formats, field values can have fractional parts; for example
2641      <literal>'1.5 week'</> or <literal>'01:02:03.45'</>.  Such input is
2642      converted to the appropriate number of months, days, and seconds
2643      for storage.  When this would result in a fractional number of
2644      months or days, the fraction is added to the lower-order fields
2645      using the conversion factors 1 month = 30 days and 1 day = 24 hours.
2646      For example, <literal>'1.5 month'</> becomes 1 month and 15 days.
2647      Only seconds will ever be shown as fractional on output.
2648     </para>
2649
2650     <para>
2651      <xref linkend="datatype-interval-input-examples"> shows some examples
2652      of valid <type>interval</> input.
2653     </para>
2654
2655      <table id="datatype-interval-input-examples">
2656       <title>Interval Input</title>
2657       <tgroup cols="2">
2658        <thead>
2659         <row>
2660          <entry>Example</entry>
2661          <entry>Description</entry>
2662         </row>
2663        </thead>
2664        <tbody>
2665         <row>
2666          <entry>1-2</entry>
2667          <entry>SQL standard format: 1 year 2 months</entry>
2668         </row>
2669         <row>
2670          <entry>3 4:05:06</entry>
2671          <entry>SQL standard format: 3 days 4 hours 5 minutes 6 seconds</entry>
2672         </row>
2673         <row>
2674          <entry>1 year 2 months 3 days 4 hours 5 minutes 6 seconds</entry>
2675          <entry>Traditional Postgres format: 1 year 2 months 3 days 4 hours 5 minutes 6 seconds</entry>
2676         </row>
2677         <row>
2678          <entry>P1Y2M3DT4H5M6S</entry>
2679          <entry>ISO 8601 <quote>format with designators</>: same meaning as above</entry>
2680         </row>
2681         <row>
2682          <entry>P0001-02-03T04:05:06</entry>
2683          <entry>ISO 8601 <quote>alternative format</>: same meaning as above</entry>
2684         </row>
2685        </tbody>
2686       </tgroup>
2687      </table>
2688
2689    </sect2>
2690
2691    <sect2 id="datatype-interval-output">
2692     <title>Interval Output</title>
2693
2694     <indexterm>
2695      <primary>interval</primary>
2696      <secondary>output format</secondary>
2697      <seealso>formatting</seealso>
2698     </indexterm>
2699
2700     <para>
2701      The output format of the interval type can be set to one of the
2702      four styles <literal>sql_standard</>, <literal>postgres</>,
2703      <literal>postgres_verbose</>, or <literal>iso_8601</>,
2704      using the command <literal>SET intervalstyle</literal>.
2705      The default is the <literal>postgres</> format.
2706      <xref linkend="interval-style-output-table"> shows examples of each
2707      output style.
2708     </para>
2709
2710     <para>
2711      The <literal>sql_standard</> style produces output that conforms to
2712      the SQL standard's specification for interval literal strings, if
2713      the interval value meets the standard's restrictions (either year-month
2714      only or day-time only, with no mixing of positive
2715      and negative components).  Otherwise the output looks like a standard
2716      year-month literal string followed by a day-time literal string,
2717      with explicit signs added to disambiguate mixed-sign intervals.
2718     </para>
2719
2720     <para>
2721      The output of the <literal>postgres</> style matches the output of
2722      <productname>PostgreSQL</> releases prior to 8.4 when the
2723      <xref linkend="guc-datestyle"> parameter was set to <literal>ISO</>.
2724     </para>
2725
2726     <para>
2727      The output of the <literal>postgres_verbose</> style matches the output of
2728      <productname>PostgreSQL</> releases prior to 8.4 when the
2729      <varname>DateStyle</> parameter was set to non-<literal>ISO</> output.
2730     </para>
2731
2732     <para>
2733      The output of the <literal>iso_8601</> style matches the <quote>format
2734      with designators</> described in section 4.4.3.2 of the
2735      ISO 8601 standard.
2736     </para>
2737
2738      <table id="interval-style-output-table">
2739        <title>Interval Output Style Examples</title>
2740        <tgroup cols="4">
2741         <thead>
2742          <row>
2743           <entry>Style Specification</entry>
2744           <entry>Year-Month Interval</entry>
2745           <entry>Day-Time Interval</entry>
2746           <entry>Mixed Interval</entry>
2747          </row>
2748         </thead>
2749         <tbody>
2750          <row>
2751           <entry><literal>sql_standard</></entry>
2752           <entry>1-2</entry>
2753           <entry>3 4:05:06</entry>
2754           <entry>-1-2 +3 -4:05:06</entry>
2755          </row>
2756          <row>
2757           <entry><literal>postgres</></entry>
2758           <entry>1 year 2 mons</entry>
2759           <entry>3 days 04:05:06</entry>
2760           <entry>-1 year -2 mons +3 days -04:05:06</entry>
2761          </row>
2762          <row>
2763           <entry><literal>postgres_verbose</></entry>
2764           <entry>@ 1 year 2 mons</entry>
2765           <entry>@ 3 days 4 hours 5 mins 6 secs</entry>
2766           <entry>@ 1 year 2 mons -3 days 4 hours 5 mins 6 secs ago</entry>
2767          </row>
2768          <row>
2769           <entry><literal>iso_8601</></entry>
2770           <entry>P1Y2M</entry>
2771           <entry>P3DT4H5M6S</entry>
2772           <entry>P-1Y-2M3DT-4H-5M-6S</entry>
2773          </row>
2774         </tbody>
2775        </tgroup>
2776     </table>
2777
2778    </sect2>
2779
2780   </sect1>
2781
2782   <sect1 id="datatype-boolean">
2783    <title>Boolean Type</title>
2784
2785    <indexterm zone="datatype-boolean">
2786     <primary>Boolean</primary>
2787     <secondary>data type</secondary>
2788    </indexterm>
2789
2790    <indexterm zone="datatype-boolean">
2791     <primary>true</primary>
2792    </indexterm>
2793
2794    <indexterm zone="datatype-boolean">
2795     <primary>false</primary>
2796    </indexterm>
2797
2798    <para>
2799     <productname>PostgreSQL</productname> provides the
2800     standard <acronym>SQL</acronym> type <type>boolean</type>;
2801     see <xref linkend="datatype-boolean-table">.
2802     The <type>boolean</type> type can have several states:
2803     <quote>true</quote>, <quote>false</quote>, and a third state,
2804     <quote>unknown</quote>, which is represented by the
2805     <acronym>SQL</acronym> null value.
2806    </para>
2807
2808    <table id="datatype-boolean-table">
2809     <title>Boolean Data Type</title>
2810     <tgroup cols="3">
2811      <thead>
2812       <row>
2813        <entry>Name</entry>
2814        <entry>Storage Size</entry>
2815        <entry>Description</entry>
2816       </row>
2817      </thead>
2818      <tbody>
2819       <row>
2820        <entry><type>boolean</type></entry>
2821        <entry>1 byte</entry>
2822        <entry>state of true or false</entry>
2823       </row>
2824      </tbody>
2825     </tgroup>
2826    </table>
2827
2828    <para>
2829     Valid literal values for the <quote>true</quote> state are:
2830     <simplelist>
2831      <member><literal>TRUE</literal></member>
2832      <member><literal>'t'</literal></member>
2833      <member><literal>'true'</literal></member>
2834      <member><literal>'y'</literal></member>
2835      <member><literal>'yes'</literal></member>
2836      <member><literal>'on'</literal></member>
2837      <member><literal>'1'</literal></member>
2838     </simplelist>
2839     For the <quote>false</quote> state, the following values can be
2840     used:
2841     <simplelist>
2842      <member><literal>FALSE</literal></member>
2843      <member><literal>'f'</literal></member>
2844      <member><literal>'false'</literal></member>
2845      <member><literal>'n'</literal></member>
2846      <member><literal>'no'</literal></member>
2847      <member><literal>'off'</literal></member>
2848      <member><literal>'0'</literal></member>
2849     </simplelist>
2850     Leading or trailing whitespace is ignored, and case does not matter.
2851     The key words
2852     <literal>TRUE</literal> and <literal>FALSE</literal> are the preferred
2853     (<acronym>SQL</acronym>-compliant) usage.
2854    </para>
2855
2856    <para>
2857     <xref linkend="datatype-boolean-example"> shows that
2858     <type>boolean</type> values are output using the letters
2859     <literal>t</literal> and <literal>f</literal>.
2860    </para>
2861
2862    <example id="datatype-boolean-example">
2863     <title>Using the <type>boolean</type> Type</title>
2864
2865 <programlisting>
2866 CREATE TABLE test1 (a boolean, b text);
2867 INSERT INTO test1 VALUES (TRUE, 'sic est');
2868 INSERT INTO test1 VALUES (FALSE, 'non est');
2869 SELECT * FROM test1;
2870  a |    b
2871 ---+---------
2872  t | sic est
2873  f | non est
2874
2875 SELECT * FROM test1 WHERE a;
2876  a |    b
2877 ---+---------
2878  t | sic est
2879 </programlisting>
2880    </example>
2881   </sect1>
2882
2883   <sect1 id="datatype-enum">
2884    <title>Enumerated Types</title>
2885
2886    <indexterm zone="datatype-enum">
2887     <primary>data type</primary>
2888     <secondary>enumerated (enum)</secondary>
2889    </indexterm>
2890
2891    <indexterm zone="datatype-enum">
2892     <primary>enumerated types</primary>
2893    </indexterm>
2894
2895    <para>
2896     Enumerated (enum) types are data types that
2897     comprise a static, ordered set of values.
2898     They are equivalent to the <type>enum</type>
2899     types supported in a number of programming languages. An example of an enum
2900     type might be the days of the week, or a set of status values for
2901     a piece of data.
2902    </para>
2903
2904    <sect2>
2905     <title>Declaration of Enumerated Types</title>
2906
2907     <para>
2908      Enum types are created using the <xref
2909      linkend="sql-createtype"> command,
2910      for example:
2911
2912 <programlisting>
2913 CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
2914 </programlisting>
2915
2916      Once created, the enum type can be used in table and function
2917      definitions much like any other type:
2918 <programlisting>
2919 CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
2920 CREATE TABLE person (
2921     name text,
2922     current_mood mood
2923 );
2924 INSERT INTO person VALUES ('Moe', 'happy');
2925 SELECT * FROM person WHERE current_mood = 'happy';
2926  name | current_mood
2927 ------+--------------
2928  Moe  | happy
2929 (1 row)
2930 </programlisting>
2931     </para>
2932     </sect2>
2933
2934     <sect2>
2935      <title>Ordering</title>
2936
2937      <para>
2938       The ordering of the values in an enum type is the
2939       order in which the values were listed when the type was created.
2940       All standard comparison operators and related
2941       aggregate functions are supported for enums.  For example:
2942
2943 <programlisting>
2944 INSERT INTO person VALUES ('Larry', 'sad');
2945 INSERT INTO person VALUES ('Curly', 'ok');
2946 SELECT * FROM person WHERE current_mood > 'sad';
2947  name  | current_mood
2948 -------+--------------
2949  Moe   | happy
2950  Curly | ok
2951 (2 rows)
2952
2953 SELECT * FROM person WHERE current_mood > 'sad' ORDER BY current_mood;
2954  name  | current_mood
2955 -------+--------------
2956  Curly | ok
2957  Moe   | happy
2958 (2 rows)
2959
2960 SELECT name
2961 FROM person
2962 WHERE current_mood = (SELECT MIN(current_mood) FROM person);
2963  name
2964 -------
2965  Larry
2966 (1 row)
2967 </programlisting>
2968      </para>
2969    </sect2>
2970
2971    <sect2>
2972     <title>Type Safety</title>
2973
2974     <para>
2975      Each enumerated data type is separate and cannot
2976      be compared with other enumerated types.  See this example:
2977
2978 <programlisting>
2979 CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic');
2980 CREATE TABLE holidays (
2981     num_weeks integer,
2982     happiness happiness
2983 );
2984 INSERT INTO holidays(num_weeks,happiness) VALUES (4, 'happy');
2985 INSERT INTO holidays(num_weeks,happiness) VALUES (6, 'very happy');
2986 INSERT INTO holidays(num_weeks,happiness) VALUES (8, 'ecstatic');
2987 INSERT INTO holidays(num_weeks,happiness) VALUES (2, 'sad');
2988 ERROR:  invalid input value for enum happiness: "sad"
2989 SELECT person.name, holidays.num_weeks FROM person, holidays
2990   WHERE person.current_mood = holidays.happiness;
2991 ERROR:  operator does not exist: mood = happiness
2992 </programlisting>
2993     </para>
2994
2995     <para>
2996      If you really need to do something like that, you can either
2997      write a custom operator or add explicit casts to your query:
2998
2999 <programlisting>
3000 SELECT person.name, holidays.num_weeks FROM person, holidays
3001   WHERE person.current_mood::text = holidays.happiness::text;
3002  name | num_weeks
3003 ------+-----------
3004  Moe  |         4
3005 (1 row)
3006
3007 </programlisting>
3008     </para>
3009    </sect2>
3010
3011    <sect2>
3012     <title>Implementation Details</title>
3013
3014     <para>
3015      An enum value occupies four bytes on disk.  The length of an enum
3016      value's textual label is limited by the <symbol>NAMEDATALEN</symbol>
3017      setting compiled into <productname>PostgreSQL</productname>; in standard
3018      builds this means at most 63 bytes.
3019     </para>
3020
3021     <para>
3022      Enum labels are case sensitive, so
3023      <type>'happy'</type> is not the same as <type>'HAPPY'</type>.
3024      White space in the labels is significant too.
3025     </para>
3026
3027     <para>
3028      The translations from internal enum values to textual labels are
3029      kept in the system catalog
3030      <link linkend="catalog-pg-enum"><structname>pg_enum</structname></link>.
3031      Querying this catalog directly can be useful.
3032     </para>
3033
3034    </sect2>
3035   </sect1>
3036
3037   <sect1 id="datatype-geometric">
3038    <title>Geometric Types</title>
3039
3040    <para>
3041     Geometric data types represent two-dimensional spatial
3042     objects. <xref linkend="datatype-geo-table"> shows the geometric
3043     types available in <productname>PostgreSQL</productname>.  The
3044     most fundamental type, the point, forms the basis for all of the
3045     other types.
3046    </para>
3047
3048     <table id="datatype-geo-table">
3049      <title>Geometric Types</title>
3050      <tgroup cols="4">
3051       <thead>
3052        <row>
3053         <entry>Name</entry>
3054         <entry>Storage Size</entry>
3055         <entry>Representation</entry>
3056         <entry>Description</entry>
3057        </row>
3058       </thead>
3059       <tbody>
3060        <row>
3061         <entry><type>point</type></entry>
3062         <entry>16 bytes</entry>
3063         <entry>Point on a plane</entry>
3064         <entry>(x,y)</entry>
3065        </row>
3066        <row>
3067         <entry><type>line</type></entry>
3068         <entry>32 bytes</entry>
3069         <entry>Infinite line (not fully implemented)</entry>
3070         <entry>((x1,y1),(x2,y2))</entry>
3071        </row>
3072        <row>
3073         <entry><type>lseg</type></entry>
3074         <entry>32 bytes</entry>
3075         <entry>Finite line segment</entry>
3076         <entry>((x1,y1),(x2,y2))</entry>
3077        </row>
3078        <row>
3079         <entry><type>box</type></entry>
3080         <entry>32 bytes</entry>
3081         <entry>Rectangular box</entry>
3082         <entry>((x1,y1),(x2,y2))</entry>
3083        </row>
3084        <row>
3085         <entry><type>path</type></entry>
3086         <entry>16+16n bytes</entry>
3087         <entry>Closed path (similar to polygon)</entry>
3088         <entry>((x1,y1),...)</entry>
3089        </row>
3090        <row>
3091         <entry><type>path</type></entry>
3092         <entry>16+16n bytes</entry>
3093         <entry>Open path</entry>
3094         <entry>[(x1,y1),...]</entry>
3095        </row>
3096        <row>
3097         <entry><type>polygon</type></entry>
3098         <entry>40+16n bytes</entry>
3099         <entry>Polygon (similar to closed path)</entry>
3100         <entry>((x1,y1),...)</entry>
3101        </row>
3102        <row>
3103         <entry><type>circle</type></entry>
3104         <entry>24 bytes</entry>
3105         <entry>Circle</entry>
3106         <entry>&lt;(x,y),r&gt; (center point and radius)</entry>
3107        </row>
3108       </tbody>
3109      </tgroup>
3110     </table>
3111
3112    <para>
3113     A rich set of functions and operators is available to perform various geometric
3114     operations such as scaling, translation, rotation, and determining
3115     intersections.  They are explained in <xref linkend="functions-geometry">.
3116    </para>
3117
3118    <sect2>
3119     <title>Points</title>
3120
3121     <indexterm>
3122      <primary>point</primary>
3123     </indexterm>
3124
3125     <para>
3126      Points are the fundamental two-dimensional building block for geometric
3127      types.  Values of type <type>point</type> are specified using either of
3128      the following syntaxes:
3129
3130 <synopsis>
3131 ( <replaceable>x</replaceable> , <replaceable>y</replaceable> )
3132   <replaceable>x</replaceable> , <replaceable>y</replaceable>
3133 </synopsis>
3134
3135      where <replaceable>x</> and <replaceable>y</> are the respective
3136      coordinates, as floating-point numbers.
3137     </para>
3138
3139     <para>
3140      Points are output using the first syntax.
3141     </para>
3142    </sect2>
3143
3144    <sect2>
3145     <title>Line Segments</title>
3146
3147     <indexterm>
3148      <primary>lseg</primary>
3149     </indexterm>
3150
3151     <indexterm>
3152      <primary>line segment</primary>
3153     </indexterm>
3154
3155     <para>
3156      Line segments (<type>lseg</type>) are represented by pairs of points.
3157      Values of type <type>lseg</type> are specified using any of the following
3158      syntaxes:
3159
3160 <synopsis>
3161 [ ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) ]
3162 ( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) )
3163   ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> )
3164     <replaceable>x1</replaceable> , <replaceable>y1</replaceable>   ,   <replaceable>x2</replaceable> , <replaceable>y2</replaceable>
3165 </synopsis>
3166
3167      where
3168      <literal>(<replaceable>x1</replaceable>,<replaceable>y1</replaceable>)</literal>
3169      and
3170      <literal>(<replaceable>x2</replaceable>,<replaceable>y2</replaceable>)</literal>
3171      are the end points of the line segment.
3172     </para>
3173
3174     <para>
3175      Line segments are output using the first syntax.
3176     </para>
3177    </sect2>
3178
3179    <sect2>
3180     <title>Boxes</title>
3181
3182     <indexterm>
3183      <primary>box (data type)</primary>
3184     </indexterm>
3185
3186     <indexterm>
3187      <primary>rectangle</primary>
3188     </indexterm>
3189
3190     <para>
3191      Boxes are represented by pairs of points that are opposite
3192      corners of the box.
3193      Values of type <type>box</type> are specified using any of the following
3194      syntaxes:
3195
3196 <synopsis>
3197 ( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) )
3198   ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> )
3199     <replaceable>x1</replaceable> , <replaceable>y1</replaceable>   ,   <replaceable>x2</replaceable> , <replaceable>y2</replaceable>
3200 </synopsis>
3201
3202      where
3203      <literal>(<replaceable>x1</replaceable>,<replaceable>y1</replaceable>)</literal>
3204      and
3205      <literal>(<replaceable>x2</replaceable>,<replaceable>y2</replaceable>)</literal>
3206      are any two opposite corners of the box.
3207     </para>
3208
3209     <para>
3210      Boxes are output using the second syntax.
3211     </para>
3212
3213     <para>
3214      Any two opposite corners can be supplied on input, but the values
3215      will be reordered as needed to store the
3216      upper right and lower left corners, in that order.
3217     </para>
3218    </sect2>
3219
3220    <sect2>
3221     <title>Paths</title>
3222
3223     <indexterm>
3224      <primary>path (data type)</primary>
3225     </indexterm>
3226
3227     <para>
3228      Paths are represented by lists of connected points. Paths can be
3229      <firstterm>open</firstterm>, where
3230      the first and last points in the list are considered not connected, or
3231      <firstterm>closed</firstterm>,
3232      where the first and last points are considered connected.
3233     </para>
3234
3235     <para>
3236      Values of type <type>path</type> are specified using any of the following
3237      syntaxes:
3238
3239 <synopsis>
3240 [ ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> ) ]
3241 ( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> ) )
3242   ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
3243   ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable>   , ... ,   <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
3244     <replaceable>x1</replaceable> , <replaceable>y1</replaceable>   , ... ,   <replaceable>xn</replaceable> , <replaceable>yn</replaceable>
3245 </synopsis>
3246
3247      where the points are the end points of the line segments
3248      comprising the path.  Square brackets (<literal>[]</>) indicate
3249      an open path, while parentheses (<literal>()</>) indicate a
3250      closed path.  When the outermost parentheses are omitted, as
3251      in the third through fifth syntaxes, a closed path is assumed.
3252     </para>
3253
3254     <para>
3255      Paths are output using the first or second syntax, as appropriate.
3256     </para>
3257    </sect2>
3258
3259    <sect2>
3260     <title>Polygons</title>
3261
3262     <indexterm>
3263      <primary>polygon</primary>
3264     </indexterm>
3265
3266     <para>
3267      Polygons are represented by lists of points (the vertexes of the
3268      polygon). Polygons are very similar to closed paths, but are
3269      stored differently and have their own set of support routines.
3270     </para>
3271
3272     <para>
3273      Values of type <type>polygon</type> are specified using any of the
3274      following syntaxes:
3275
3276 <synopsis>
3277 ( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> ) )
3278   ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
3279   ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable>   , ... ,   <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
3280     <replaceable>x1</replaceable> , <replaceable>y1</replaceable>   , ... ,   <replaceable>xn</replaceable> , <replaceable>yn</replaceable>
3281 </synopsis>
3282
3283      where the points are the end points of the line segments
3284      comprising the boundary of the polygon.
3285     </para>
3286
3287     <para>
3288      Polygons are output using the first syntax.
3289     </para>
3290    </sect2>
3291
3292    <sect2>
3293     <title>Circles</title>
3294
3295     <indexterm>
3296      <primary>circle</primary>
3297     </indexterm>
3298
3299     <para>
3300      Circles are represented by a center point and radius.
3301      Values of type <type>circle</type> are specified using any of the
3302      following syntaxes:
3303
3304 <synopsis>
3305 &lt; ( <replaceable>x</replaceable> , <replaceable>y</replaceable> ) , <replaceable>r</replaceable> &gt;
3306 ( ( <replaceable>x</replaceable> , <replaceable>y</replaceable> ) , <replaceable>r</replaceable> )
3307   ( <replaceable>x</replaceable> , <replaceable>y</replaceable> ) , <replaceable>r</replaceable>
3308     <replaceable>x</replaceable> , <replaceable>y</replaceable>   , <replaceable>r</replaceable>
3309 </synopsis>
3310
3311      where
3312      <literal>(<replaceable>x</replaceable>,<replaceable>y</replaceable>)</>
3313      is the center point and <replaceable>r</replaceable> is the radius of the
3314      circle.
3315     </para>
3316
3317     <para>
3318      Circles are output using the first syntax.
3319     </para>
3320    </sect2>
3321
3322   </sect1>
3323
3324   <sect1 id="datatype-net-types">
3325    <title>Network Address Types</title>
3326
3327    <indexterm zone="datatype-net-types">
3328     <primary>network</primary>
3329     <secondary>data types</secondary>
3330    </indexterm>
3331
3332    <para>
3333     <productname>PostgreSQL</> offers data types to store IPv4, IPv6, and MAC
3334     addresses, as shown in <xref linkend="datatype-net-types-table">.  It
3335     is better to use these types instead of plain text types to store
3336     network addresses, because
3337     these types offer input error checking and specialized
3338     operators and functions (see <xref linkend="functions-net">).
3339    </para>
3340
3341     <table tocentry="1" id="datatype-net-types-table">
3342      <title>Network Address Types</title>
3343      <tgroup cols="3">
3344       <thead>
3345        <row>
3346         <entry>Name</entry>
3347         <entry>Storage Size</entry>
3348         <entry>Description</entry>
3349        </row>
3350       </thead>
3351       <tbody>
3352
3353        <row>
3354         <entry><type>cidr</type></entry>
3355         <entry>7 or 19 bytes</entry>
3356         <entry>IPv4 and IPv6 networks</entry>
3357        </row>
3358
3359        <row>
3360         <entry><type>inet</type></entry>
3361         <entry>7 or 19 bytes</entry>
3362         <entry>IPv4 and IPv6 hosts and networks</entry>
3363        </row>
3364
3365        <row>
3366         <entry><type>macaddr</type></entry>
3367         <entry>6 bytes</entry>
3368         <entry>MAC addresses</entry>
3369        </row>
3370
3371       </tbody>
3372      </tgroup>
3373     </table>
3374
3375    <para>
3376     When sorting <type>inet</type> or <type>cidr</type> data types,
3377     IPv4 addresses will always sort before IPv6 addresses, including
3378     IPv4 addresses encapsulated or mapped to IPv6 addresses, such as
3379     ::10.2.3.4 or ::ffff:10.4.3.2.
3380    </para>
3381
3382
3383    <sect2 id="datatype-inet">
3384     <title><type>inet</type></title>
3385
3386     <indexterm>
3387      <primary>inet (data type)</primary>
3388     </indexterm>
3389
3390     <para>
3391      The <type>inet</type> type holds an IPv4 or IPv6 host address, and
3392      optionally its subnet, all in one field.
3393      The subnet is represented by the number of network address bits
3394      present in the host address (the
3395      <quote>netmask</quote>).  If the netmask is 32 and the address is IPv4,
3396      then the value does not indicate a subnet, only a single host.
3397      In IPv6, the address length is 128 bits, so 128 bits specify a
3398      unique host address.  Note that if you
3399      want to accept only networks, you should use the
3400      <type>cidr</type> type rather than <type>inet</type>.
3401     </para>
3402
3403     <para>
3404       The input format for this type is
3405       <replaceable class="parameter">address/y</replaceable>
3406       where
3407       <replaceable class="parameter">address</replaceable>
3408       is an IPv4 or IPv6 address and
3409       <replaceable class="parameter">y</replaceable>
3410       is the number of bits in the netmask.  If the
3411       <replaceable class="parameter">/y</replaceable>
3412       portion is missing, the
3413       netmask is 32 for IPv4 and 128 for IPv6, so the value represents
3414       just a single host.  On display, the
3415       <replaceable class="parameter">/y</replaceable>
3416       portion is suppressed if the netmask specifies a single host.
3417     </para>
3418    </sect2>
3419
3420    <sect2 id="datatype-cidr">
3421     <title><type>cidr</></title>
3422
3423     <indexterm>
3424      <primary>cidr</primary>
3425     </indexterm>
3426
3427     <para>
3428      The <type>cidr</type> type holds an IPv4 or IPv6 network specification.
3429      Input and output formats follow Classless Internet Domain Routing
3430      conventions.
3431      The format for specifying networks is <replaceable
3432      class="parameter">address/y</> where <replaceable
3433      class="parameter">address</> is the network represented as an
3434      IPv4 or IPv6 address, and <replaceable
3435      class="parameter">y</> is the number of bits in the netmask.  If
3436      <replaceable class="parameter">y</> is omitted, it is calculated
3437      using assumptions from the older classful network numbering system, except
3438      it will be at least large enough to include all of the octets
3439      written in the input.  It is an error to specify a network address
3440      that has bits set to the right of the specified netmask.
3441     </para>
3442
3443     <para>
3444      <xref linkend="datatype-net-cidr-table"> shows some examples.
3445     </para>
3446
3447      <table id="datatype-net-cidr-table">
3448       <title><type>cidr</> Type Input Examples</title>
3449       <tgroup cols="3">
3450        <thead>
3451         <row>
3452          <entry><type>cidr</type> Input</entry>
3453          <entry><type>cidr</type> Output</entry>
3454          <entry><literal><function>abbrev(<type>cidr</type>)</function></literal></entry>
3455         </row>
3456        </thead>
3457        <tbody>
3458         <row>
3459          <entry>192.168.100.128/25</entry>
3460          <entry>192.168.100.128/25</entry>
3461          <entry>192.168.100.128/25</entry>
3462         </row>
3463         <row>
3464          <entry>192.168/24</entry>
3465          <entry>192.168.0.0/24</entry>
3466          <entry>192.168.0/24</entry>
3467         </row>
3468         <row>
3469          <entry>192.168/25</entry>
3470          <entry>192.168.0.0/25</entry>
3471          <entry>192.168.0.0/25</entry>
3472         </row>
3473         <row>
3474          <entry>192.168.1</entry>
3475          <entry>192.168.1.0/24</entry>
3476          <entry>192.168.1/24</entry>
3477         </row>
3478         <row>
3479          <entry>192.168</entry>
3480          <entry>192.168.0.0/24</entry>
3481          <entry>192.168.0/24</entry>
3482         </row>
3483         <row>
3484          <entry>128.1</entry>
3485          <entry>128.1.0.0/16</entry>
3486          <entry>128.1/16</entry>
3487         </row>
3488         <row>
3489          <entry>128</entry>
3490          <entry>128.0.0.0/16</entry>
3491          <entry>128.0/16</entry>
3492         </row>
3493         <row>
3494          <entry>128.1.2</entry>
3495          <entry>128.1.2.0/24</entry>
3496          <entry>128.1.2/24</entry>
3497         </row>
3498         <row>
3499          <entry>10.1.2</entry>
3500          <entry>10.1.2.0/24</entry>
3501          <entry>10.1.2/24</entry>
3502         </row>
3503         <row>
3504          <entry>10.1</entry>
3505          <entry>10.1.0.0/16</entry>
3506          <entry>10.1/16</entry>
3507         </row>
3508         <row>
3509          <entry>10</entry>
3510          <entry>10.0.0.0/8</entry>
3511          <entry>10/8</entry>
3512         </row>
3513         <row>
3514          <entry>10.1.2.3/32</entry>
3515          <entry>10.1.2.3/32</entry>
3516          <entry>10.1.2.3/32</entry>
3517         </row>
3518         <row>
3519          <entry>2001:4f8:3:ba::/64</entry>
3520          <entry>2001:4f8:3:ba::/64</entry>
3521          <entry>2001:4f8:3:ba::/64</entry>
3522         </row>
3523         <row>
3524          <entry>2001:4f8:3:ba:2e0:81ff:fe22:d1f1/128</entry>
3525          <entry>2001:4f8:3:ba:2e0:81ff:fe22:d1f1/128</entry>
3526          <entry>2001:4f8:3:ba:2e0:81ff:fe22:d1f1</entry>
3527         </row>
3528         <row>
3529          <entry>::ffff:1.2.3.0/120</entry>
3530          <entry>::ffff:1.2.3.0/120</entry>
3531          <entry>::ffff:1.2.3/120</entry>
3532         </row>
3533         <row>
3534          <entry>::ffff:1.2.3.0/128</entry>
3535          <entry>::ffff:1.2.3.0/128</entry>
3536          <entry>::ffff:1.2.3.0/128</entry>
3537         </row>
3538        </tbody>
3539       </tgroup>
3540      </table>
3541    </sect2>
3542
3543    <sect2 id="datatype-inet-vs-cidr">
3544     <title><type>inet</type> vs. <type>cidr</type></title>
3545
3546     <para>
3547     The essential difference between <type>inet</type> and <type>cidr</type>
3548     data types is that <type>inet</type> accepts values with nonzero bits to
3549     the right of the netmask, whereas <type>cidr</type> does not.
3550     </para>
3551
3552       <tip>
3553         <para>
3554         If you do not like the output format for <type>inet</type> or
3555         <type>cidr</type> values, try the functions <function>host</>,
3556         <function>text</>, and <function>abbrev</>.
3557         </para>
3558       </tip>
3559    </sect2>
3560
3561    <sect2 id="datatype-macaddr">
3562     <title><type>macaddr</type></title>
3563
3564     <indexterm>
3565      <primary>macaddr (data type)</primary>
3566     </indexterm>
3567
3568     <indexterm>
3569      <primary>MAC address</primary>
3570      <see>macaddr</see>
3571     </indexterm>
3572
3573     <para>
3574      The <type>macaddr</> type stores MAC addresses, known for example
3575      from Ethernet card hardware addresses (although MAC addresses are
3576      used for other purposes as well).  Input is accepted in the
3577      following formats:
3578
3579      <simplelist>
3580       <member><literal>'08:00:2b:01:02:03'</></member>
3581       <member><literal>'08-00-2b-01-02-03'</></member>
3582       <member><literal>'08002b:010203'</></member>
3583       <member><literal>'08002b-010203'</></member>
3584       <member><literal>'0800.2b01.0203'</></member>
3585       <member><literal>'08002b010203'</></member>
3586      </simplelist>
3587
3588      These examples would all specify the same address.  Upper and
3589      lower case is accepted for the digits
3590      <literal>a</> through <literal>f</>.  Output is always in the
3591      first of the forms shown.
3592     </para>
3593
3594     <para>
3595      IEEE Std 802-2001 specifies the second shown form (with hyphens)
3596      as the canonical form for MAC addresses, and specifies the first
3597      form (with colons) as the bit-reversed notation, so that
3598      08-00-2b-01-02-03 = 01:00:4D:08:04:0C.  This convention is widely
3599      ignored nowadays, and it is relevant only for obsolete network
3600      protocols (such as Token Ring).  PostgreSQL makes no provisions
3601      for bit reversal, and all accepted formats use the canonical LSB
3602      order.
3603     </para>
3604
3605     <para>
3606      The remaining four input formats are not part of any standard.
3607     </para>
3608    </sect2>
3609
3610   </sect1>
3611
3612   <sect1 id="datatype-bit">
3613    <title>Bit String Types</title>
3614
3615    <indexterm zone="datatype-bit">
3616     <primary>bit string</primary>
3617     <secondary>data type</secondary>
3618    </indexterm>
3619
3620    <para>
3621     Bit strings are strings of 1's and 0's.  They can be used to store
3622     or visualize bit masks.  There are two SQL bit types:
3623     <type>bit(<replaceable>n</replaceable>)</type> and <type>bit
3624     varying(<replaceable>n</replaceable>)</type>, where
3625     <replaceable>n</replaceable> is a positive integer.
3626    </para>
3627
3628    <para>
3629     <type>bit</type> type data must match the length
3630     <replaceable>n</replaceable> exactly; it is an error to attempt to
3631     store shorter or longer bit strings.  <type>bit varying</type> data is
3632     of variable length up to the maximum length
3633     <replaceable>n</replaceable>; longer strings will be rejected.
3634     Writing <type>bit</type> without a length is equivalent to
3635     <literal>bit(1)</literal>, while <type>bit varying</type> without a length
3636     specification means unlimited length.
3637    </para>
3638
3639    <note>
3640     <para>
3641      If one explicitly casts a bit-string value to
3642      <type>bit(<replaceable>n</>)</type>, it will be truncated or
3643      zero-padded on the right to be exactly <replaceable>n</> bits,
3644      without raising an error.  Similarly,
3645      if one explicitly casts a bit-string value to
3646      <type>bit varying(<replaceable>n</>)</type>, it will be truncated
3647      on the right if it is more than <replaceable>n</> bits.
3648     </para>
3649    </note>
3650
3651    <para>
3652     Refer to <xref
3653     linkend="sql-syntax-bit-strings"> for information about the syntax
3654     of bit string constants.  Bit-logical operators and string
3655     manipulation functions are available; see <xref
3656     linkend="functions-bitstring">.
3657    </para>
3658
3659    <example>
3660     <title>Using the Bit String Types</title>
3661
3662 <programlisting>
3663 CREATE TABLE test (a BIT(3), b BIT VARYING(5));
3664 INSERT INTO test VALUES (B'101', B'00');
3665 INSERT INTO test VALUES (B'10', B'101');
3666 <computeroutput>
3667 ERROR:  bit string length 2 does not match type bit(3)
3668 </computeroutput>
3669 INSERT INTO test VALUES (B'10'::bit(3), B'101');
3670 SELECT * FROM test;
3671 <computeroutput>
3672   a  |  b
3673 -----+-----
3674  101 | 00
3675  100 | 101
3676 </computeroutput>
3677 </programlisting>
3678    </example>
3679
3680    <para>
3681     A bit string value requires 1 byte for each group of 8 bits, plus
3682     5 or 8 bytes overhead depending on the length of the string
3683     (but long values may be compressed or moved out-of-line, as explained
3684     in <xref linkend="datatype-character"> for character strings).
3685    </para>
3686   </sect1>
3687
3688   <sect1 id="datatype-textsearch">
3689    <title>Text Search Types</title>
3690
3691    <indexterm zone="datatype-textsearch">
3692     <primary>full text search</primary>
3693     <secondary>data types</secondary>
3694    </indexterm>
3695
3696    <indexterm zone="datatype-textsearch">
3697     <primary>text search</primary>
3698     <secondary>data types</secondary>
3699    </indexterm>
3700
3701    <para>
3702     <productname>PostgreSQL</productname> provides two data types that
3703     are designed to support full text search, which is the activity of
3704     searching through a collection of natural-language <firstterm>documents</>
3705     to locate those that best match a <firstterm>query</>.
3706     The <type>tsvector</type> type represents a document in a form optimized
3707     for text search; the <type>tsquery</type> type similarly represents
3708     a text query.
3709     <xref linkend="textsearch"> provides a detailed explanation of this
3710     facility, and <xref linkend="functions-textsearch"> summarizes the
3711     related functions and operators.
3712    </para>
3713
3714    <sect2 id="datatype-tsvector">
3715     <title><type>tsvector</type></title>
3716
3717     <indexterm>
3718      <primary>tsvector (data type)</primary>
3719     </indexterm>
3720
3721     <para>
3722      A <type>tsvector</type> value is a sorted list of distinct
3723      <firstterm>lexemes</>, which are words that have been
3724      <firstterm>normalized</> to merge different variants of the same word
3725      (see <xref linkend="textsearch"> for details).  Sorting and
3726      duplicate-elimination are done automatically during input, as shown in
3727      this example:
3728
3729 <programlisting>
3730 SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
3731                       tsvector
3732 ----------------------------------------------------
3733  'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat'
3734 </programlisting>
3735
3736      To represent
3737      lexemes containing whitespace or punctuation, surround them with quotes:
3738
3739 <programlisting>
3740 SELECT $$the lexeme '    ' contains spaces$$::tsvector;
3741                  tsvector
3742 -------------------------------------------
3743  '    ' 'contains' 'lexeme' 'spaces' 'the'
3744 </programlisting>
3745
3746      (We use dollar-quoted string literals in this example and the next one
3747      to avoid the confusion of having to double quote marks within the
3748      literals.)  Embedded quotes and backslashes must be doubled:
3749
3750 <programlisting>
3751 SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
3752                     tsvector
3753 ------------------------------------------------
3754  'Joe''s' 'a' 'contains' 'lexeme' 'quote' 'the'
3755 </programlisting>
3756
3757      Optionally, integer <firstterm>positions</>
3758      can be attached to lexemes:
3759
3760 <programlisting>
3761 SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector;
3762                                   tsvector
3763 -------------------------------------------------------------------------------
3764  'a':1,6,10 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'on':5 'rat':12 'sat':4
3765 </programlisting>
3766
3767      A position normally indicates the source word's location in the
3768      document.  Positional information can be used for
3769      <firstterm>proximity ranking</firstterm>.  Position values can
3770      range from 1 to 16383; larger numbers are silently set to 16383.
3771      Duplicate positions for the same lexeme are discarded.
3772     </para>
3773
3774     <para>
3775      Lexemes that have positions can further be labeled with a
3776      <firstterm>weight</>, which can be <literal>A</literal>,
3777      <literal>B</literal>, <literal>C</literal>, or <literal>D</literal>.
3778      <literal>D</literal> is the default and hence is not shown on output:
3779
3780 <programlisting>
3781 SELECT 'a:1A fat:2B,4C cat:5D'::tsvector;
3782           tsvector
3783 ----------------------------
3784  'a':1A 'cat':5 'fat':2B,4C
3785 </programlisting>
3786
3787      Weights are typically used to reflect document structure, for example
3788      by marking title words differently from body words.  Text search
3789      ranking functions can assign different priorities to the different
3790      weight markers.
3791     </para>
3792
3793     <para>
3794      It is important to understand that the
3795      <type>tsvector</type> type itself does not perform any normalization;
3796      it assumes the words it is given are normalized appropriately
3797      for the application.  For example,
3798
3799 <programlisting>
3800 select 'The Fat Rats'::tsvector;
3801       tsvector
3802 --------------------
3803  'Fat' 'Rats' 'The'
3804 </programlisting>
3805
3806      For most English-text-searching applications the above words would
3807      be considered non-normalized, but <type>tsvector</type> doesn't care.
3808      Raw document text should usually be passed through
3809      <function>to_tsvector</> to normalize the words appropriately
3810      for searching:
3811
3812 <programlisting>
3813 SELECT to_tsvector('english', 'The Fat Rats');
3814    to_tsvector
3815 -----------------
3816  'fat':2 'rat':3
3817 </programlisting>
3818
3819      Again, see <xref linkend="textsearch"> for more detail.
3820     </para>
3821
3822    </sect2>
3823
3824    <sect2 id="datatype-tsquery">
3825     <title><type>tsquery</type></title>
3826
3827     <indexterm>
3828      <primary>tsquery (data type)</primary>
3829     </indexterm>
3830
3831     <para>
3832      A <type>tsquery</type> value stores lexemes that are to be
3833      searched for, and combines them honoring the Boolean operators
3834      <literal>&amp;</literal> (AND), <literal>|</literal> (OR), and
3835      <literal>!</> (NOT).  Parentheses can be used to enforce grouping
3836      of the operators:
3837
3838 <programlisting>
3839 SELECT 'fat &amp; rat'::tsquery;
3840     tsquery
3841 ---------------
3842  'fat' &amp; 'rat'
3843
3844 SELECT 'fat &amp; (rat | cat)'::tsquery;
3845           tsquery
3846 ---------------------------
3847  'fat' &amp; ( 'rat' | 'cat' )
3848
3849 SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
3850         tsquery
3851 ------------------------
3852  'fat' &amp; 'rat' &amp; !'cat'
3853 </programlisting>
3854
3855      In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
3856      and <literal>&amp;</literal> (AND) binds more tightly than
3857      <literal>|</literal> (OR).
3858     </para>
3859
3860     <para>
3861      Optionally, lexemes in a <type>tsquery</type> can be labeled with
3862      one or more weight letters, which restricts them to match only
3863      <type>tsvector</> lexemes with matching weights:
3864
3865 <programlisting>
3866 SELECT 'fat:ab &amp; cat'::tsquery;
3867     tsquery
3868 ------------------
3869  'fat':AB &amp; 'cat'
3870 </programlisting>
3871     </para>
3872
3873     <para>
3874      Also, lexemes in a <type>tsquery</type> can be labeled with <literal>*</>
3875      to specify prefix matching:
3876 <programlisting>
3877 SELECT 'super:*'::tsquery;
3878   tsquery
3879 -----------
3880  'super':*
3881 </programlisting>
3882      This query will match any word in a <type>tsvector</> that begins
3883      with <quote>super</>.  Note that prefixes are first processed by
3884      text search configurations, which means this comparison returns
3885      true:
3886 <programlisting>
3887 SELECT to_tsvector( 'postgraduate' ) @@ to_tsquery( 'postgres:*' );
3888  ?column?
3889 ----------
3890  t
3891 (1 row)
3892 </programlisting>
3893      because <literal>postgres</> gets stemmed to <literal>postgr</>:
3894 <programlisting>
3895 SELECT to_tsquery('postgres:*');
3896  to_tsquery
3897 ------------
3898  'postgr':*
3899 (1 row)
3900 </programlisting>
3901      which then matches <literal>postgraduate</>.
3902     </para>
3903
3904     <para>
3905      Quoting rules for lexemes are the same as described previously for
3906      lexemes in <type>tsvector</>; and, as with <type>tsvector</>,
3907      any required normalization of words must be done before converting
3908      to the <type>tsquery</> type.  The <function>to_tsquery</>
3909      function is convenient for performing such normalization:
3910
3911 <programlisting>
3912 SELECT to_tsquery('Fat:ab &amp; Cats');
3913     to_tsquery
3914 ------------------
3915  'fat':AB &amp; 'cat'
3916 </programlisting>
3917     </para>
3918
3919    </sect2>
3920
3921   </sect1>
3922
3923   <sect1 id="datatype-uuid">
3924    <title><acronym>UUID</acronym> Type</title>
3925
3926    <indexterm zone="datatype-uuid">
3927     <primary>UUID</primary>
3928    </indexterm>
3929
3930    <para>
3931     The data type <type>uuid</type> stores Universally Unique Identifiers
3932     (UUID) as defined by RFC 4122, ISO/IEC 9834-8:2005, and related standards.
3933     (Some systems refer to this data type as a globally unique identifier, or
3934     GUID,<indexterm><primary>GUID</primary></indexterm> instead.)  This
3935     identifier is a 128-bit quantity that is generated by an algorithm chosen
3936     to make it very unlikely that the same identifier will be generated by
3937     anyone else in the known universe using the same algorithm.  Therefore,
3938     for distributed systems, these identifiers provide a better uniqueness
3939     guarantee than sequence generators, which
3940     are only unique within a single database.
3941    </para>
3942
3943    <para>
3944     A UUID is written as a sequence of lower-case hexadecimal digits,
3945     in several groups separated by hyphens, specifically a group of 8
3946     digits followed by three groups of 4 digits followed by a group of
3947     12 digits, for a total of 32 digits representing the 128 bits.  An
3948     example of a UUID in this standard form is:
3949 <programlisting>
3950 a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11
3951 </programlisting>
3952     <productname>PostgreSQL</productname> also accepts the following
3953     alternative forms for input:
3954     use of upper-case digits, the standard format surrounded by
3955     braces, omitting some or all hyphens, adding a hyphen after any
3956     group of four digits.  Examples are:
3957 <programlisting>
3958 A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11
3959 {a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11}
3960 a0eebc999c0b4ef8bb6d6bb9bd380a11
3961 a0ee-bc99-9c0b-4ef8-bb6d-6bb9-bd38-0a11
3962 {a0eebc99-9c0b4ef8-bb6d6bb9-bd380a11}
3963 </programlisting>
3964     Output is always in the standard form.
3965    </para>
3966
3967    <para>
3968     <productname>PostgreSQL</productname> provides storage and comparison
3969     functions for UUIDs, but the core database does not include any
3970     function for generating UUIDs, because no single algorithm is well
3971     suited for every application.  The <xref
3972     linkend="uuid-ossp"> module
3973     provides functions that implement several standard algorithms.
3974     Alternatively, UUIDs could be generated by client applications or
3975     other libraries invoked through a server-side function.
3976    </para>
3977   </sect1>
3978
3979   <sect1 id="datatype-xml">
3980    <title><acronym>XML</> Type</title>
3981
3982    <indexterm zone="datatype-xml">
3983     <primary>XML</primary>
3984    </indexterm>
3985
3986    <para>
3987     The <type>xml</type> data type can be used to store XML data.  Its
3988     advantage over storing XML data in a <type>text</type> field is that it
3989     checks the input values for well-formedness, and there are support
3990     functions to perform type-safe operations on it; see <xref
3991     linkend="functions-xml">.  Use of this data type requires the
3992     installation to have been built with <command>configure
3993     --with-libxml</>.
3994    </para>
3995
3996    <para>
3997     The <type>xml</type> type can store well-formed
3998     <quote>documents</quote>, as defined by the XML standard, as well
3999     as <quote>content</quote> fragments, which are defined by the
4000     production <literal>XMLDecl? content</literal> in the XML
4001     standard.  Roughly, this means that content fragments can have
4002     more than one top-level element or character node.  The expression
4003     <literal><replaceable>xmlvalue</replaceable> IS DOCUMENT</literal>
4004     can be used to evaluate whether a particular <type>xml</type>
4005     value is a full document or only a content fragment.
4006    </para>
4007
4008    <sect2>
4009     <title>Creating XML Values</title>
4010    <para>
4011     To produce a value of type <type>xml</type> from character data,
4012     use the function
4013     <function>xmlparse</function>:<indexterm><primary>xmlparse</primary></indexterm>
4014 <synopsis>
4015 XMLPARSE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable>)
4016 </synopsis>
4017     Examples:
4018 <programlisting><![CDATA[
4019 XMLPARSE (DOCUMENT '<?xml version="1.0"?><book><title>Manual</title><chapter>...</chapter></book>')
4020 XMLPARSE (CONTENT 'abc<foo>bar</foo><bar>foo</bar>')
4021 ]]></programlisting>
4022     While this is the only way to convert character strings into XML
4023     values according to the SQL standard, the PostgreSQL-specific
4024     syntaxes:
4025 <programlisting><![CDATA[
4026 xml '<foo>bar</foo>'
4027 '<foo>bar</foo>'::xml
4028 ]]></programlisting>
4029     can also be used.
4030    </para>
4031
4032    <para>
4033     The <type>xml</type> type does not validate input values
4034     against a document type declaration
4035     (DTD),<indexterm><primary>DTD</primary></indexterm>
4036     even when the input value specifies a DTD.
4037     There is also currently no built-in support for validating against
4038     other XML schema languages such as XML Schema.
4039    </para>
4040
4041    <para>
4042     The inverse operation, producing a character string value from
4043     <type>xml</type>, uses the function
4044     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
4045 <synopsis>
4046 XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> )
4047 </synopsis>
4048     <replaceable>type</replaceable> can be
4049     <type>character</type>, <type>character varying</type>, or
4050     <type>text</type> (or an alias for one of those).  Again, according
4051     to the SQL standard, this is the only way to convert between type
4052     <type>xml</type> and character types, but PostgreSQL also allows
4053     you to simply cast the value.
4054    </para>
4055
4056    <para>
4057     When a character string value is cast to or from type
4058     <type>xml</type> without going through <type>XMLPARSE</type> or
4059     <type>XMLSERIALIZE</type>, respectively, the choice of
4060     <literal>DOCUMENT</literal> versus <literal>CONTENT</literal> is
4061     determined by the <quote>XML option</quote>
4062     <indexterm><primary>XML option</primary></indexterm>
4063     session configuration parameter, which can be set using the
4064     standard command:
4065 <synopsis>
4066 SET XML OPTION { DOCUMENT | CONTENT };
4067 </synopsis>
4068     or the more PostgreSQL-like syntax
4069 <synopsis>
4070 SET xmloption TO { DOCUMENT | CONTENT };
4071 </synopsis>
4072     The default is <literal>CONTENT</literal>, so all forms of XML
4073     data are allowed.
4074    </para>
4075
4076    <note>
4077     <para>
4078      With the default XML option setting, you cannot directly cast
4079      character strings to type <type>xml</type> if they contain a
4080      document type declaration, because the definition of XML content
4081      fragment does not accept them.  If you need to do that, either
4082      use <literal>XMLPARSE</literal> or change the XML option.
4083     </para>
4084    </note>
4085
4086    </sect2>
4087
4088    <sect2>
4089     <title>Encoding Handling</title>
4090    <para>
4091     Care must be taken when dealing with multiple character encodings
4092     on the client, server, and in the XML data passed through them.
4093     When using the text mode to pass queries to the server and query
4094     results to the client (which is the normal mode), PostgreSQL
4095     converts all character data passed between the client and the
4096     server and vice versa to the character encoding of the respective
4097     end; see <xref linkend="multibyte">.  This includes string
4098     representations of XML values, such as in the above examples.
4099     This would ordinarily mean that encoding declarations contained in
4100     XML data can become invalid as the character data is converted
4101     to other encodings while traveling between client and server,
4102     because the embedded encoding declaration is not changed.  To cope
4103     with this behavior, encoding declarations contained in
4104     character strings presented for input to the <type>xml</type> type
4105     are <emphasis>ignored</emphasis>, and content is assumed
4106     to be in the current server encoding.  Consequently, for correct
4107     processing, character strings of XML data must be sent
4108     from the client in the current client encoding.  It is the
4109     responsibility of the client to either convert documents to the
4110     current client encoding before sending them to the server, or to
4111     adjust the client encoding appropriately.  On output, values of
4112     type <type>xml</type> will not have an encoding declaration, and
4113     clients should assume all data is in the current client
4114     encoding.
4115    </para>
4116
4117    <para>
4118     When using binary mode to pass query parameters to the server
4119     and query results back to the client, no character set conversion
4120     is performed, so the situation is different.  In this case, an
4121     encoding declaration in the XML data will be observed, and if it
4122     is absent, the data will be assumed to be in UTF-8 (as required by
4123     the XML standard; note that PostgreSQL does not support UTF-16).
4124     On output, data will have an encoding declaration
4125     specifying the client encoding, unless the client encoding is
4126     UTF-8, in which case it will be omitted.
4127    </para>
4128
4129    <para>
4130     Needless to say, processing XML data with PostgreSQL will be less
4131     error-prone and more efficient if the XML data encoding, client encoding,
4132     and server encoding are the same.  Since XML data is internally
4133     processed in UTF-8, computations will be most efficient if the
4134     server encoding is also UTF-8.
4135    </para>
4136
4137    <caution>
4138     <para>
4139      Some XML-related functions may not work at all on non-ASCII data
4140      when the server encoding is not UTF-8.  This is known to be an
4141      issue for <function>xpath()</> in particular.
4142     </para>
4143    </caution>
4144    </sect2>
4145
4146    <sect2>
4147    <title>Accessing XML Values</title>
4148
4149    <para>
4150     The <type>xml</type> data type is unusual in that it does not
4151     provide any comparison operators.  This is because there is no
4152     well-defined and universally useful comparison algorithm for XML
4153     data.  One consequence of this is that you cannot retrieve rows by
4154     comparing an <type>xml</type> column against a search value.  XML
4155     values should therefore typically be accompanied by a separate key
4156     field such as an ID.  An alternative solution for comparing XML
4157     values is to convert them to character strings first, but note
4158     that character string comparison has little to do with a useful
4159     XML comparison method.
4160    </para>
4161
4162    <para>
4163     Since there are no comparison operators for the <type>xml</type>
4164     data type, it is not possible to create an index directly on a
4165     column of this type.  If speedy searches in XML data are desired,
4166     possible workarounds include casting the expression to a
4167     character string type and indexing that, or indexing an XPath
4168     expression.  Of course, the actual query would have to be adjusted
4169     to search by the indexed expression.
4170    </para>
4171
4172    <para>
4173     The text-search functionality in PostgreSQL can also be used to speed
4174     up full-document searches of XML data.  The necessary
4175     preprocessing support is, however, not yet available in the PostgreSQL
4176     distribution.
4177    </para>
4178    </sect2>
4179   </sect1>
4180
4181   <sect1 id="datatype-json">
4182    <title><acronym>JSON</> Type</title>
4183
4184    <indexterm zone="datatype-json">
4185     <primary>JSON</primary>
4186    </indexterm>
4187
4188    <para>
4189     The <type>json</type> data type can be used to store JSON (JavaScript
4190     Object Notation) data, as specified in <ulink
4191     url="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</ulink>.  Such
4192     data can also be stored as <type>text</type>, but the
4193     <type>json</type> data type has the advantage of checking that each
4194     stored value is a valid JSON value.  There are also related support
4195     functions available; see <xref linkend="functions-json">.
4196    </para>
4197
4198    <para>
4199     <productname>PostgreSQL</productname> allows only one server encoding
4200     per database.  It is therefore not possible for JSON to conform rigidly
4201     to the specification unless the server encoding is UTF-8.  Attempts to
4202     directly include characters which cannot be represented in the server
4203     encoding will fail; conversely, characters which can be represented in
4204     the server encoding but not in UTF-8 will be allowed.
4205     <literal>\uXXXX</literal> escapes are allowed regardless of the server
4206     encoding, and are checked only for syntactic correctness.
4207    </para>
4208   </sect1>
4209
4210   &array;
4211
4212   &rowtypes;
4213
4214   &rangetypes;
4215
4216   <sect1 id="datatype-oid">
4217    <title>Object Identifier Types</title>
4218
4219    <indexterm zone="datatype-oid">
4220     <primary>object identifier</primary>
4221     <secondary>data type</secondary>
4222    </indexterm>
4223
4224    <indexterm zone="datatype-oid">
4225     <primary>oid</primary>
4226    </indexterm>
4227
4228    <indexterm zone="datatype-oid">
4229     <primary>regproc</primary>
4230    </indexterm>
4231
4232    <indexterm zone="datatype-oid">
4233     <primary>regprocedure</primary>
4234    </indexterm>
4235
4236    <indexterm zone="datatype-oid">
4237     <primary>regoper</primary>
4238    </indexterm>
4239
4240    <indexterm zone="datatype-oid">
4241     <primary>regoperator</primary>
4242    </indexterm>
4243
4244    <indexterm zone="datatype-oid">
4245     <primary>regclass</primary>
4246    </indexterm>
4247
4248    <indexterm zone="datatype-oid">
4249     <primary>regtype</primary>
4250    </indexterm>
4251
4252    <indexterm zone="datatype-oid">
4253     <primary>regconfig</primary>
4254    </indexterm>
4255
4256    <indexterm zone="datatype-oid">
4257     <primary>regdictionary</primary>
4258    </indexterm>
4259
4260    <indexterm zone="datatype-oid">
4261     <primary>xid</primary>
4262    </indexterm>
4263
4264    <indexterm zone="datatype-oid">
4265     <primary>cid</primary>
4266    </indexterm>
4267
4268    <indexterm zone="datatype-oid">
4269     <primary>tid</primary>
4270    </indexterm>
4271
4272    <para>
4273     Object identifiers (OIDs) are used internally by
4274     <productname>PostgreSQL</productname> as primary keys for various
4275     system tables.  OIDs are not added to user-created tables, unless
4276     <literal>WITH OIDS</literal> is specified when the table is
4277     created, or the <xref linkend="guc-default-with-oids">
4278     configuration variable is enabled.  Type <type>oid</> represents
4279     an object identifier.  There are also several alias types for
4280     <type>oid</>: <type>regproc</>, <type>regprocedure</>,
4281     <type>regoper</>, <type>regoperator</>, <type>regclass</>,
4282     <type>regtype</>, <type>regconfig</>, and <type>regdictionary</>.
4283     <xref linkend="datatype-oid-table"> shows an overview.
4284    </para>
4285
4286    <para>
4287     The <type>oid</> type is currently implemented as an unsigned
4288     four-byte integer.  Therefore, it is not large enough to provide
4289     database-wide uniqueness in large databases, or even in large
4290     individual tables.  So, using a user-created table's OID column as
4291     a primary key is discouraged.  OIDs are best used only for
4292     references to system tables.
4293    </para>
4294
4295    <para>
4296     The <type>oid</> type itself has few operations beyond comparison.
4297     It can be cast to integer, however, and then manipulated using the
4298     standard integer operators.  (Beware of possible
4299     signed-versus-unsigned confusion if you do this.)
4300    </para>
4301
4302    <para>
4303     The OID alias types have no operations of their own except
4304     for specialized input and output routines.  These routines are able
4305     to accept and display symbolic names for system objects, rather than
4306     the raw numeric value that type <type>oid</> would use.  The alias
4307     types allow simplified lookup of OID values for objects.  For example,
4308     to examine the <structname>pg_attribute</> rows related to a table
4309     <literal>mytable</>, one could write:
4310 <programlisting>
4311 SELECT * FROM pg_attribute WHERE attrelid = 'mytable'::regclass;
4312 </programlisting>
4313     rather than:
4314 <programlisting>
4315 SELECT * FROM pg_attribute
4316   WHERE attrelid = (SELECT oid FROM pg_class WHERE relname = 'mytable');
4317 </programlisting>
4318     While that doesn't look all that bad by itself, it's still oversimplified.
4319     A far more complicated sub-select would be needed to
4320     select the right OID if there are multiple tables named
4321     <literal>mytable</> in different schemas.
4322     The <type>regclass</> input converter handles the table lookup according
4323     to the schema path setting, and so it does the <quote>right thing</>
4324     automatically.  Similarly, casting a table's OID to
4325     <type>regclass</> is handy for symbolic display of a numeric OID.
4326    </para>
4327
4328     <table id="datatype-oid-table">
4329      <title>Object Identifier Types</title>
4330      <tgroup cols="4">
4331       <thead>
4332        <row>
4333         <entry>Name</entry>
4334         <entry>References</entry>
4335         <entry>Description</entry>
4336         <entry>Value Example</entry>
4337        </row>
4338       </thead>
4339
4340       <tbody>
4341
4342        <row>
4343         <entry><type>oid</></entry>
4344         <entry>any</entry>
4345         <entry>numeric object identifier</entry>
4346         <entry><literal>564182</></entry>
4347        </row>
4348
4349        <row>
4350         <entry><type>regproc</></entry>
4351         <entry><structname>pg_proc</></entry>
4352         <entry>function name</entry>
4353         <entry><literal>sum</></entry>
4354        </row>
4355
4356        <row>
4357         <entry><type>regprocedure</></entry>
4358         <entry><structname>pg_proc</></entry>
4359         <entry>function with argument types</entry>
4360         <entry><literal>sum(int4)</></entry>
4361        </row>
4362
4363        <row>
4364         <entry><type>regoper</></entry>
4365         <entry><structname>pg_operator</></entry>
4366         <entry>operator name</entry>
4367         <entry><literal>+</></entry>
4368        </row>
4369
4370        <row>
4371         <entry><type>regoperator</></entry>
4372         <entry><structname>pg_operator</></entry>
4373         <entry>operator with argument types</entry>
4374         <entry><literal>*(integer,integer)</> or <literal>-(NONE,integer)</></entry>
4375        </row>
4376
4377        <row>
4378         <entry><type>regclass</></entry>
4379         <entry><structname>pg_class</></entry>
4380         <entry>relation name</entry>
4381         <entry><literal>pg_type</></entry>
4382        </row>
4383
4384        <row>
4385         <entry><type>regtype</></entry>
4386         <entry><structname>pg_type</></entry>
4387         <entry>data type name</entry>
4388         <entry><literal>integer</></entry>
4389        </row>
4390
4391        <row>
4392         <entry><type>regconfig</></entry>
4393         <entry><structname>pg_ts_config</></entry>
4394         <entry>text search configuration</entry>
4395         <entry><literal>english</></entry>
4396        </row>
4397
4398        <row>
4399         <entry><type>regdictionary</></entry>
4400         <entry><structname>pg_ts_dict</></entry>
4401         <entry>text search dictionary</entry>
4402         <entry><literal>simple</></entry>
4403        </row>
4404       </tbody>
4405      </tgroup>
4406     </table>
4407
4408    <para>
4409     All of the OID alias types accept schema-qualified names, and will
4410     display schema-qualified names on output if the object would not
4411     be found in the current search path without being qualified.
4412     The <type>regproc</> and <type>regoper</> alias types will only
4413     accept input names that are unique (not overloaded), so they are
4414     of limited use; for most uses <type>regprocedure</> or
4415     <type>regoperator</> are more appropriate.  For <type>regoperator</>,
4416     unary operators are identified by writing <literal>NONE</> for the unused
4417     operand.
4418    </para>
4419
4420    <para>
4421     An additional property of the OID alias types is the creation of
4422     dependencies.  If a
4423     constant of one of these types appears in a stored expression
4424     (such as a column default expression or view), it creates a dependency
4425     on the referenced object.  For example, if a column has a default
4426     expression <literal>nextval('my_seq'::regclass)</>,
4427     <productname>PostgreSQL</productname>
4428     understands that the default expression depends on the sequence
4429     <literal>my_seq</>; the system will not let the sequence be dropped
4430     without first removing the default expression.
4431    </para>
4432
4433    <para>
4434     Another identifier type used by the system is <type>xid</>, or transaction
4435     (abbreviated <abbrev>xact</>) identifier.  This is the data type of the system columns
4436     <structfield>xmin</> and <structfield>xmax</>.  Transaction identifiers are 32-bit quantities.
4437    </para>
4438
4439    <para>
4440     A third identifier type used by the system is <type>cid</>, or
4441     command identifier.  This is the data type of the system columns
4442     <structfield>cmin</> and <structfield>cmax</>. Command identifiers are also 32-bit quantities.
4443    </para>
4444
4445    <para>
4446     A final identifier type used by the system is <type>tid</>, or tuple
4447     identifier (row identifier).  This is the data type of the system column
4448     <structfield>ctid</>.  A tuple ID is a pair
4449     (block number, tuple index within block) that identifies the
4450     physical location of the row within its table.
4451    </para>
4452
4453    <para>
4454     (The system columns are further explained in <xref
4455     linkend="ddl-system-columns">.)
4456    </para>
4457   </sect1>
4458
4459   <sect1 id="datatype-pseudo">
4460    <title>Pseudo-Types</title>
4461
4462    <indexterm zone="datatype-pseudo">
4463     <primary>record</primary>
4464    </indexterm>
4465
4466    <indexterm zone="datatype-pseudo">
4467     <primary>any</primary>
4468    </indexterm>
4469
4470    <indexterm zone="datatype-pseudo">
4471     <primary>anyelement</primary>
4472    </indexterm>
4473
4474    <indexterm zone="datatype-pseudo">
4475     <primary>anyarray</primary>
4476    </indexterm>
4477
4478    <indexterm zone="datatype-pseudo">
4479     <primary>anynonarray</primary>
4480    </indexterm>
4481
4482    <indexterm zone="datatype-pseudo">
4483     <primary>anyenum</primary>
4484    </indexterm>
4485
4486    <indexterm zone="datatype-pseudo">
4487     <primary>anyrange</primary>
4488    </indexterm>
4489
4490    <indexterm zone="datatype-pseudo">
4491     <primary>void</primary>
4492    </indexterm>
4493
4494    <indexterm zone="datatype-pseudo">
4495     <primary>trigger</primary>
4496    </indexterm>
4497
4498    <indexterm zone="datatype-pseudo">
4499     <primary>language_handler</primary>
4500    </indexterm>
4501
4502    <indexterm zone="datatype-pseudo">
4503     <primary>fdw_handler</primary>
4504    </indexterm>
4505
4506    <indexterm zone="datatype-pseudo">
4507     <primary>cstring</primary>
4508    </indexterm>
4509
4510    <indexterm zone="datatype-pseudo">
4511     <primary>internal</primary>
4512    </indexterm>
4513
4514    <indexterm zone="datatype-pseudo">
4515     <primary>opaque</primary>
4516    </indexterm>
4517
4518    <para>
4519     The <productname>PostgreSQL</productname> type system contains a
4520     number of special-purpose entries that are collectively called
4521     <firstterm>pseudo-types</>.  A pseudo-type cannot be used as a
4522     column data type, but it can be used to declare a function's
4523     argument or result type.  Each of the available pseudo-types is
4524     useful in situations where a function's behavior does not
4525     correspond to simply taking or returning a value of a specific
4526     <acronym>SQL</acronym> data type.  <xref
4527     linkend="datatype-pseudotypes-table"> lists the existing
4528     pseudo-types.
4529    </para>
4530
4531     <table id="datatype-pseudotypes-table">
4532      <title>Pseudo-Types</title>
4533      <tgroup cols="2">
4534       <thead>
4535        <row>
4536         <entry>Name</entry>
4537         <entry>Description</entry>
4538        </row>
4539       </thead>
4540
4541       <tbody>
4542        <row>
4543         <entry><type>any</></entry>
4544         <entry>Indicates that a function accepts any input data type.</entry>
4545        </row>
4546
4547        <row>
4548         <entry><type>anyelement</></entry>
4549         <entry>Indicates that a function accepts any data type
4550         (see <xref linkend="extend-types-polymorphic">).</entry>
4551        </row>
4552
4553        <row>
4554         <entry><type>anyarray</></entry>
4555         <entry>Indicates that a function accepts any array data type
4556         (see <xref linkend="extend-types-polymorphic">).</entry>
4557        </row>
4558
4559        <row>
4560         <entry><type>anynonarray</></entry>
4561         <entry>Indicates that a function accepts any non-array data type
4562         (see <xref linkend="extend-types-polymorphic">).</entry>
4563        </row>
4564
4565        <row>
4566         <entry><type>anyenum</></entry>
4567         <entry>Indicates that a function accepts any enum data type
4568         (see <xref linkend="extend-types-polymorphic"> and
4569         <xref linkend="datatype-enum">).</entry>
4570        </row>
4571
4572        <row>
4573         <entry><type>anyrange</></entry>
4574         <entry>Indicates that a function accepts any range data type
4575         (see <xref linkend="extend-types-polymorphic"> and
4576         <xref linkend="rangetypes">).</entry>
4577        </row>
4578
4579        <row>
4580         <entry><type>cstring</></entry>
4581         <entry>Indicates that a function accepts or returns a null-terminated C string.</entry>
4582        </row>
4583
4584        <row>
4585         <entry><type>internal</></entry>
4586         <entry>Indicates that a function accepts or returns a server-internal
4587         data type.</entry>
4588        </row>
4589
4590        <row>
4591         <entry><type>language_handler</></entry>
4592         <entry>A procedural language call handler is declared to return <type>language_handler</>.</entry>
4593        </row>
4594
4595        <row>
4596         <entry><type>fdw_handler</></entry>
4597         <entry>A foreign-data wrapper handler is declared to return <type>fdw_handler</>.</entry>
4598        </row>
4599
4600        <row>
4601         <entry><type>record</></entry>
4602         <entry>Identifies a function returning an unspecified row type.</entry>
4603        </row>
4604
4605        <row>
4606         <entry><type>trigger</></entry>
4607         <entry>A trigger function is declared to return <type>trigger.</></entry>
4608        </row>
4609
4610        <row>
4611         <entry><type>void</></entry>
4612         <entry>Indicates that a function returns no value.</entry>
4613        </row>
4614
4615        <row>
4616         <entry><type>opaque</></entry>
4617         <entry>An obsolete type name that formerly served all the above purposes.</entry>
4618        </row>
4619       </tbody>
4620      </tgroup>
4621     </table>
4622
4623    <para>
4624     Functions coded in C (whether built-in or dynamically loaded) can be
4625     declared to accept or return any of these pseudo data types.  It is up to
4626     the function author to ensure that the function will behave safely
4627     when a pseudo-type is used as an argument type.
4628    </para>
4629
4630    <para>
4631     Functions coded in procedural languages can use pseudo-types only as
4632     allowed by their implementation languages.  At present the procedural
4633     languages all forbid use of a pseudo-type as argument type, and allow
4634     only <type>void</> and <type>record</> as a result type (plus
4635     <type>trigger</> when the function is used as a trigger).  Some also
4636     support polymorphic functions using the types <type>anyelement</>,
4637     <type>anyarray</>, <type>anynonarray</>, <type>anyenum</>, and
4638     <type>anyrange</>.
4639    </para>
4640
4641    <para>
4642     The <type>internal</> pseudo-type is used to declare functions
4643     that are meant only to be called internally by the database
4644     system, and not by direct invocation in an <acronym>SQL</acronym>
4645     query.  If a function has at least one <type>internal</>-type
4646     argument then it cannot be called from <acronym>SQL</acronym>.  To
4647     preserve the type safety of this restriction it is important to
4648     follow this coding rule: do not create any function that is
4649     declared to return <type>internal</> unless it has at least one
4650     <type>internal</> argument.
4651    </para>
4652
4653   </sect1>
4654
4655  </chapter>