granicus.if.org Git - postgresql/blob - doc/src/sgml/xfunc.sgml

   1 <!-- $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.130 2007/11/10 20:14:36 tgl Exp $ -->
   2
   3  <sect1 id="xfunc">
   4   <title>User-Defined Functions</title>
   5
   6   <indexterm zone="xfunc">
   7    <primary>function</primary>
   8    <secondary>user-defined</secondary>
   9   </indexterm>
  10
  11   <para>
  12    <productname>PostgreSQL</productname> provides four kinds of
  13    functions:
  14
  15    <itemizedlist>
  16     <listitem>
  17      <para>
  18       query language functions (functions written in
  19       <acronym>SQL</acronym>) (<xref linkend="xfunc-sql">)
  20      </para>
  21     </listitem>
  22     <listitem>
  23      <para>
  24       procedural language functions (functions written in, for
  25       example, <application>PL/pgSQL</> or <application>PL/Tcl</>)
  26       (<xref linkend="xfunc-pl">)
  27      </para>
  28     </listitem>
  29     <listitem>
  30      <para>
  31       internal functions (<xref linkend="xfunc-internal">)
  32      </para>
  33     </listitem>
  34     <listitem>
  35      <para>
  36       C-language functions (<xref linkend="xfunc-c">)
  37      </para>
  38     </listitem>
  39    </itemizedlist>
  40   </para>
  41
  42   <para>
  43    Every kind
  44    of  function  can take base types, composite types, or
  45    combinations of these as arguments (parameters). In addition,
  46    every kind of function can return a base type or
  47    a composite type.  Functions can also be defined to return
  48    sets of base or composite values.
  49   </para>
  50
  51   <para>
  52    Many kinds of functions can take or return certain pseudo-types
  53    (such as polymorphic types), but the available facilities vary.
  54    Consult the description of each kind of function for more details.
  55   </para>
  56
  57   <para>
  58    It's easiest to define <acronym>SQL</acronym>
  59    functions, so we'll start by discussing those.
  60    Most of the concepts presented for <acronym>SQL</acronym> functions
  61    will carry over to the other types of functions.
  62   </para>
  63
  64   <para>
  65    Throughout this chapter, it can be useful to look at the reference
  66    page of the <xref linkend="sql-createfunction"
  67    endterm="sql-createfunction-title"> command to
  68    understand the examples better.  Some examples from this chapter
  69    can be found in <filename>funcs.sql</filename> and
  70    <filename>funcs.c</filename> in the <filename>src/tutorial</>
  71    directory in the <productname>PostgreSQL</productname> source
  72    distribution.
  73   </para>
  74   </sect1>
  75
  76   <sect1 id="xfunc-sql">
  77    <title>Query Language (<acronym>SQL</acronym>) Functions</title>
  78
  79    <indexterm zone="xfunc-sql">
  80     <primary>function</primary>
  81     <secondary>user-defined</secondary>
  82     <tertiary>in SQL</tertiary>
  83    </indexterm>
  84
  85    <para>
  86     SQL functions execute an arbitrary list of SQL statements, returning
  87     the result of the last query in the list.
  88     In the simple (non-set)
  89     case, the first row of the last query's result will be returned.
  90     (Bear in mind that <quote>the first row</quote> of a multirow
  91     result is not well-defined unless you use <literal>ORDER BY</>.)
  92     If the last query happens
  93     to return no rows at all, the null value will be returned.
  94    </para>
  95
  96    <para>
  97     <indexterm><primary>SETOF</><seealso>function</></> Alternatively,
  98     an SQL function can be declared to return a set, by specifying the
  99     function's return type as <literal>SETOF
 100     <replaceable>sometype</></literal>.  In this case all rows of the
 101     last query's result are returned.  Further details appear below.
 102    </para>
 103
 104    <para>
 105     The body of an SQL function must be a list of SQL
 106     statements separated by semicolons.  A semicolon after the last
 107     statement is optional.  Unless the function is declared to return
 108     <type>void</>, the last statement must be a <command>SELECT</>.
 109    </para>
 110
 111     <para>
 112      Any collection of commands in the  <acronym>SQL</acronym>
 113      language can be packaged together and defined as a function.
 114      Besides <command>SELECT</command> queries, the commands can include data
 115      modification queries (<command>INSERT</command>,
 116      <command>UPDATE</command>, and <command>DELETE</command>), as well as
 117      other SQL commands. (The only exception is that you cannot put
 118      <command>BEGIN</>, <command>COMMIT</>, <command>ROLLBACK</>, or
 119      <command>SAVEPOINT</> commands into a <acronym>SQL</acronym> function.)
 120      However, the final command
 121      must be a <command>SELECT</command> that returns whatever is
 122      specified as the function's return type.  Alternatively, if you
 123      want to define a SQL function that performs actions but has no
 124      useful value to return, you can define it as returning <type>void</>.
 125      In that case, the function body must not end with a <command>SELECT</command>.
 126      For example, this function removes rows with negative salaries from
 127      the <literal>emp</> table:
 128
 129 <screen>
 130 CREATE FUNCTION clean_emp() RETURNS void AS '
 131     DELETE FROM emp
 132         WHERE salary &lt; 0;
 133 ' LANGUAGE SQL;
 134
 135 SELECT clean_emp();
 136
 137  clean_emp
 138 -----------
 139
 140 (1 row)
 141 </screen>
 142     </para>
 143
 144    <para>
 145     The syntax of the <command>CREATE FUNCTION</command> command requires
 146     the function body to be written as a string constant.  It is usually
 147     most convenient to use dollar quoting (see <xref
 148     linkend="sql-syntax-dollar-quoting">) for the string constant.
 149     If you choose to use regular single-quoted string constant syntax,
 150     you must double single quote marks (<literal>'</>) and backslashes
 151     (<literal>\</>) (assuming escape string syntax) in the body of
 152     the function (see <xref linkend="sql-syntax-strings">).
 153    </para>
 154
 155    <para>
 156     Arguments to the SQL function are referenced in the function
 157     body using the syntax <literal>$<replaceable>n</></>: <literal>$1</>
 158     refers to the first argument, <literal>$2</> to the second, and so on.
 159     If an argument is of a composite type, then the dot notation,
 160     e.g., <literal>$1.name</literal>, can be used to access attributes
 161     of the argument.  The arguments can only be used as data values,
 162     not as identifiers.  Thus for example this is reasonable:
 163 <programlisting>
 164 INSERT INTO mytable VALUES ($1);
 165 </programlisting>
 166 but this will not work:
 167 <programlisting>
 168 INSERT INTO $1 VALUES (42);
 169 </programlisting>
 170    </para>
 171
 172    <sect2 id="xfunc-sql-base-functions">
 173     <title><acronym>SQL</acronym> Functions on Base Types</title>
 174
 175     <para>
 176      The simplest possible <acronym>SQL</acronym> function has no arguments and
 177      simply returns a base type, such as <type>integer</type>:
 178
 179 <screen>
 180 CREATE FUNCTION one() RETURNS integer AS $$
 181     SELECT 1 AS result;
 182 $$ LANGUAGE SQL;
 183
 184 -- Alternative syntax for string literal:
 185 CREATE FUNCTION one() RETURNS integer AS '
 186     SELECT 1 AS result;
 187 ' LANGUAGE SQL;
 188
 189 SELECT one();
 190
 191  one
 192 -----
 193    1
 194 </screen>
 195     </para>
 196
 197     <para>
 198      Notice that we defined a column alias within the function body for the result of the function
 199      (with  the  name <literal>result</>),  but this column alias is not visible
 200      outside the function.  Hence,  the  result  is labeled <literal>one</>
 201      instead of <literal>result</>.
 202     </para>
 203
 204     <para>
 205      It is almost as easy to define <acronym>SQL</acronym> functions
 206      that take base types as arguments.  In the example below, notice
 207      how we refer to the arguments within the function as <literal>$1</>
 208      and <literal>$2</>.
 209
 210 <screen>
 211 CREATE FUNCTION add_em(integer, integer) RETURNS integer AS $$
 212     SELECT $1 + $2;
 213 $$ LANGUAGE SQL;
 214
 215 SELECT add_em(1, 2) AS answer;
 216
 217  answer
 218 --------
 219       3
 220 </screen>
 221     </para>
 222
 223     <para>
 224      Here is a more useful function, which might be used to debit a
 225      bank account:
 226
 227 <programlisting>
 228 CREATE FUNCTION tf1 (integer, numeric) RETURNS integer AS $$
 229     UPDATE bank
 230         SET balance = balance - $2
 231         WHERE accountno = $1;
 232     SELECT 1;
 233 $$ LANGUAGE SQL;
 234 </programlisting>
 235
 236      A user could execute this function to debit account 17 by $100.00 as
 237      follows:
 238
 239 <programlisting>
 240 SELECT tf1(17, 100.0);
 241 </programlisting>
 242     </para>
 243
 244     <para>
 245      In practice one would probably like a more useful result from the
 246      function than a constant 1, so a more likely definition
 247      is:
 248
 249 <programlisting>
 250 CREATE FUNCTION tf1 (integer, numeric) RETURNS numeric AS $$
 251     UPDATE bank
 252         SET balance = balance - $2
 253         WHERE accountno = $1;
 254     SELECT balance FROM bank WHERE accountno = $1;
 255 $$ LANGUAGE SQL;
 256 </programlisting>
 257
 258      which adjusts the balance and returns the new balance.
 259     </para>
 260    </sect2>
 261
 262    <sect2>
 263     <title><acronym>SQL</acronym> Functions on Composite Types</title>
 264
 265     <para>
 266      When writing  functions with arguments of composite
 267      types, we must  not  only  specify  which
 268      argument  we  want (as we did above with <literal>$1</> and <literal>$2</literal>) but
 269      also the desired attribute (field) of  that  argument.   For  example,
 270      suppose that
 271      <type>emp</type> is a table containing employee data, and therefore
 272      also the name of the composite type of each row of the table.  Here
 273      is a function <function>double_salary</function> that computes what someone's
 274      salary would be if it were doubled:
 275
 276 <screen>
 277 CREATE TABLE emp (
 278     name        text,
 279     salary      numeric,
 280     age         integer,
 281     cubicle     point
 282 );
 283
 284 CREATE FUNCTION double_salary(emp) RETURNS numeric AS $$
 285     SELECT $1.salary * 2 AS salary;
 286 $$ LANGUAGE SQL;
 287
 288 SELECT name, double_salary(emp.*) AS dream
 289     FROM emp
 290     WHERE emp.cubicle ~= point '(2,1)';
 291
 292  name | dream
 293 ------+-------
 294  Bill |  8400
 295 </screen>
 296     </para>
 297
 298     <para>
 299      Notice the use of the syntax <literal>$1.salary</literal>
 300      to select one field of the argument row value.  Also notice
 301      how the calling <command>SELECT</> command uses <literal>*</>
 302      to select
 303      the entire current row of a table as a composite value.  The table
 304      row can alternatively be referenced using just the table name,
 305      like this:
 306 <screen>
 307 SELECT name, double_salary(emp) AS dream
 308     FROM emp
 309     WHERE emp.cubicle ~= point '(2,1)';
 310 </screen>
 311      but this usage is deprecated since it's easy to get confused.
 312     </para>
 313
 314     <para>
 315      Sometimes it is handy to construct a composite argument value
 316      on-the-fly.  This can be done with the <literal>ROW</> construct.
 317      For example, we could adjust the data being passed to the function:
 318 <screen>
 319 SELECT name, double_salary(ROW(name, salary*1.1, age, cubicle)) AS dream
 320     FROM emp;
 321 </screen>
 322     </para>
 323
 324     <para>
 325      It is also possible to build a function that returns a composite type.
 326      This is an example of a function
 327      that returns a single <type>emp</type> row:
 328
 329 <programlisting>
 330 CREATE FUNCTION new_emp() RETURNS emp AS $$
 331     SELECT text 'None' AS name,
 332         1000.0 AS salary,
 333         25 AS age,
 334         point '(2,2)' AS cubicle;
 335 $$ LANGUAGE SQL;
 336 </programlisting>
 337
 338      In this example we have specified each of  the  attributes
 339      with  a  constant value, but any computation
 340      could have been substituted for these constants.
 341     </para>
 342
 343     <para>
 344      Note two important things about defining the function:
 345
 346      <itemizedlist>
 347       <listitem>
 348        <para>
 349         The select list order in the query must be exactly the same as
 350         that in which the columns appear in the table associated
 351         with the composite type.  (Naming the columns, as we did above,
 352         is irrelevant to the system.)
 353        </para>
 354       </listitem>
 355       <listitem>
 356        <para>
 357         You must typecast the expressions to match the
 358         definition of the composite type, or you will get errors like this:
 359 <screen>
 360 <computeroutput>
 361 ERROR:  function declared to return emp returns varchar instead of text at column 1
 362 </computeroutput>
 363 </screen>
 364        </para>
 365       </listitem>
 366      </itemizedlist>
 367     </para>
 368
 369     <para>
 370      A different way to define the same function is:
 371
 372 <programlisting>
 373 CREATE FUNCTION new_emp() RETURNS emp AS $$
 374     SELECT ROW('None', 1000.0, 25, '(2,2)')::emp;
 375 $$ LANGUAGE SQL;
 376 </programlisting>
 377
 378      Here we wrote a <command>SELECT</> that returns just a single
 379      column of the correct composite type.  This isn't really better
 380      in this situation, but it is a handy alternative in some cases
 381      &mdash; for example, if we need to compute the result by calling
 382      another function that returns the desired composite value.
 383     </para>
 384
 385     <para>
 386      We could call this function directly in either of two ways:
 387
 388 <screen>
 389 SELECT new_emp();
 390
 391          new_emp
 392 --------------------------
 393  (None,1000.0,25,"(2,2)")
 394
 395 SELECT * FROM new_emp();
 396
 397  name | salary | age | cubicle
 398 ------+--------+-----+---------
 399  None | 1000.0 |  25 | (2,2)
 400 </screen>
 401
 402      The second way is described more fully in <xref
 403      linkend="xfunc-sql-table-functions">.
 404     </para>
 405
 406     <para>
 407      When you use a function that returns a composite type,
 408      you might want only one field (attribute) from its result.
 409      You can do that with syntax like this:
 410
 411 <screen>
 412 SELECT (new_emp()).name;
 413
 414  name
 415 ------
 416  None
 417 </screen>
 418
 419      The extra parentheses are needed to keep the parser from getting
 420      confused.  If you try to do it without them, you get something like this:
 421
 422 <screen>
 423 SELECT new_emp().name;
 424 ERROR:  syntax error at or near "." at character 17
 425 LINE 1: SELECT new_emp().name;
 426                         ^
 427 </screen>
 428     </para>
 429
 430     <para>
 431      Another option is to use
 432      functional notation for extracting an attribute.  The  simple  way
 433      to explain this is that we can use the
 434      notations <literal>attribute(table)</>  and  <literal>table.attribute</>
 435      interchangeably.
 436
 437 <screen>
 438 SELECT name(new_emp());
 439
 440  name
 441 ------
 442  None
 443 </screen>
 444
 445 <screen>
 446 -- This is the same as:
 447 -- SELECT emp.name AS youngster FROM emp WHERE emp.age &lt; 30;
 448
 449 SELECT name(emp) AS youngster FROM emp WHERE age(emp) &lt; 30;
 450
 451  youngster
 452 -----------
 453  Sam
 454  Andy
 455 </screen>
 456     </para>
 457
 458     <tip>
 459      <para>
 460       The equivalence between functional notation and attribute notation
 461       makes it possible to use functions on composite types to emulate
 462       <quote>computed fields</>.
 463       <indexterm>
 464        <primary>computed field</primary>
 465       </indexterm>
 466       <indexterm>
 467        <primary>field</primary>
 468        <secondary>computed</secondary>
 469       </indexterm>
 470       For example, using the previous definition
 471       for <literal>double_salary(emp)</>, we can write
 472
 473 <screen>
 474 SELECT emp.name, emp.double_salary FROM emp;
 475 </screen>
 476
 477       An application using this wouldn't need to be directly aware that
 478       <literal>double_salary</> isn't a real column of the table.
 479       (You can also emulate computed fields with views.)
 480      </para>
 481     </tip>
 482
 483     <para>
 484      Another way to use a function returning a composite type is to pass the
 485      result to another function that accepts the correct row type as input:
 486
 487 <screen>
 488 CREATE FUNCTION getname(emp) RETURNS text AS $$
 489     SELECT $1.name;
 490 $$ LANGUAGE SQL;
 491
 492 SELECT getname(new_emp());
 493  getname
 494 ---------
 495  None
 496 (1 row)
 497 </screen>
 498     </para>
 499
 500     <para>
 501      Still another way to use a function that returns a composite type is to
 502      call it as a table function, as described in <xref
 503      linkend="xfunc-sql-table-functions">.
 504     </para>
 505    </sect2>
 506
 507    <sect2 id="xfunc-output-parameters">
 508     <title>Functions with Output Parameters</title>
 509
 510    <indexterm>
 511     <primary>function</primary>
 512     <secondary>output parameter</secondary>
 513    </indexterm>
 514
 515     <para>
 516      An alternative way of describing a function's results is to define it
 517      with <firstterm>output parameters</>, as in this example:
 518
 519 <screen>
 520 CREATE FUNCTION add_em (IN x int, IN y int, OUT sum int)
 521 AS 'SELECT $1 + $2'
 522 LANGUAGE SQL;
 523
 524 SELECT add_em(3,7);
 525  add_em
 526 --------
 527      10
 528 (1 row)
 529 </screen>
 530
 531      This is not essentially different from the version of <literal>add_em</>
 532      shown in <xref linkend="xfunc-sql-base-functions">.  The real value of
 533      output parameters is that they provide a convenient way of defining
 534      functions that return several columns.  For example,
 535
 536 <screen>
 537 CREATE FUNCTION sum_n_product (x int, y int, OUT sum int, OUT product int)
 538 AS 'SELECT $1 + $2, $1 * $2'
 539 LANGUAGE SQL;
 540
 541  SELECT * FROM sum_n_product(11,42);
 542  sum | product
 543 -----+---------
 544   53 |     462
 545 (1 row)
 546 </screen>
 547
 548      What has essentially happened here is that we have created an anonymous
 549      composite type for the result of the function.  The above example has
 550      the same end result as
 551
 552 <screen>
 553 CREATE TYPE sum_prod AS (sum int, product int);
 554
 555 CREATE FUNCTION sum_n_product (int, int) RETURNS sum_prod
 556 AS 'SELECT $1 + $2, $1 * $2'
 557 LANGUAGE SQL;
 558 </screen>
 559
 560      but not having to bother with the separate composite type definition
 561      is often handy.
 562     </para>
 563
 564     <para>
 565      Notice that output parameters are not included in the calling argument
 566      list when invoking such a function from SQL.  This is because
 567      <productname>PostgreSQL</productname> considers only the input
 568      parameters to define the function's calling signature.  That means
 569      also that only the input parameters matter when referencing the function
 570      for purposes such as dropping it.  We could drop the above function
 571      with either of
 572
 573 <screen>
 574 DROP FUNCTION sum_n_product (x int, y int, OUT sum int, OUT product int);
 575 DROP FUNCTION sum_n_product (int, int);
 576 </screen>
 577     </para>
 578
 579     <para>
 580      Parameters can be marked as <literal>IN</> (the default),
 581      <literal>OUT</>, or <literal>INOUT</>.  An <literal>INOUT</>
 582      parameter serves as both an input parameter (part of the calling
 583      argument list) and an output parameter (part of the result record type).
 584     </para>
 585    </sect2>
 586
 587    <sect2 id="xfunc-sql-table-functions">
 588     <title><acronym>SQL</acronym> Functions as Table Sources</title>
 589
 590     <para>
 591      All SQL functions can be used in the <literal>FROM</> clause of a query,
 592      but it is particularly useful for functions returning composite types.
 593      If the function is defined to return a base type, the table function
 594      produces a one-column table.  If the function is defined to return
 595      a composite type, the table function produces a column for each attribute
 596      of the composite type.
 597     </para>
 598
 599     <para>
 600      Here is an example:
 601
 602 <screen>
 603 CREATE TABLE foo (fooid int, foosubid int, fooname text);
 604 INSERT INTO foo VALUES (1, 1, 'Joe');
 605 INSERT INTO foo VALUES (1, 2, 'Ed');
 606 INSERT INTO foo VALUES (2, 1, 'Mary');
 607
 608 CREATE FUNCTION getfoo(int) RETURNS foo AS $$
 609     SELECT * FROM foo WHERE fooid = $1;
 610 $$ LANGUAGE SQL;
 611
 612 SELECT *, upper(fooname) FROM getfoo(1) AS t1;
 613
 614  fooid | foosubid | fooname | upper
 615 -------+----------+---------+-------
 616      1 |        1 | Joe     | JOE
 617 (1 row)
 618 </screen>
 619
 620      As the example shows, we can work with the columns of the function's
 621      result just the same as if they were columns of a regular table.
 622     </para>
 623
 624     <para>
 625      Note that we only got one row out of the function.  This is because
 626      we did not use <literal>SETOF</>.  That is described in the next section.
 627     </para>
 628    </sect2>
 629
 630    <sect2>
 631     <title><acronym>SQL</acronym> Functions Returning Sets</title>
 632
 633     <para>
 634      When an SQL function is declared as returning <literal>SETOF
 635      <replaceable>sometype</></literal>, the function's final
 636      <command>SELECT</> query is executed to completion, and each row it
 637      outputs is returned as an element of the result set.
 638     </para>
 639
 640     <para>
 641      This feature is normally used when calling the function in the <literal>FROM</>
 642      clause.  In this case each row returned by the function becomes
 643      a row of the table seen by the query.  For example, assume that
 644      table <literal>foo</> has the same contents as above, and we say:
 645
 646 <programlisting>
 647 CREATE FUNCTION getfoo(int) RETURNS SETOF foo AS $$
 648     SELECT * FROM foo WHERE fooid = $1;
 649 $$ LANGUAGE SQL;
 650
 651 SELECT * FROM getfoo(1) AS t1;
 652 </programlisting>
 653
 654      Then we would get:
 655 <screen>
 656  fooid | foosubid | fooname
 657 -------+----------+---------
 658      1 |        1 | Joe
 659      1 |        2 | Ed
 660 (2 rows)
 661 </screen>
 662     </para>
 663
 664     <para>
 665      It is also possible to return multiple rows with the columns defined by
 666      output parameters, like this:
 667
 668 <programlisting>
 669 CREATE FUNCTION sum_n_product_with_tab (x int, OUT sum int, OUT product int) RETURNS SETOF record AS $$
 670     SELECT x + tab.y, x * tab.y FROM tab;
 671 $$ LANGUAGE SQL;
 672 </programlisting>
 673
 674      The key point here is that you must write <literal>RETURNS SETOF record</>
 675      to indicate that the function returns multiple rows instead of just one.
 676      If there is only one output parameter, write that parameter's type
 677      instead of <type>record</>.
 678     </para>
 679
 680     <para>
 681      Currently, functions returning sets can also be called in the select list
 682      of a query.  For each row that the query
 683      generates by itself, the function returning set is invoked, and an output
 684      row is generated for each element of the function's result set. Note,
 685      however, that this capability is deprecated and might be removed in future
 686      releases. The following is an example function returning a set from the
 687      select list:
 688
 689 <screen>
 690 CREATE FUNCTION listchildren(text) RETURNS SETOF text AS $$
 691     SELECT name FROM nodes WHERE parent = $1
 692 $$ LANGUAGE SQL;
 693
 694 SELECT * FROM nodes;
 695    name    | parent
 696 -----------+--------
 697  Top       |
 698  Child1    | Top
 699  Child2    | Top
 700  Child3    | Top
 701  SubChild1 | Child1
 702  SubChild2 | Child1
 703 (6 rows)
 704
 705 SELECT listchildren('Top');
 706  listchildren
 707 --------------
 708  Child1
 709  Child2
 710  Child3
 711 (3 rows)
 712
 713 SELECT name, listchildren(name) FROM nodes;
 714   name  | listchildren
 715 --------+--------------
 716  Top    | Child1
 717  Top    | Child2
 718  Top    | Child3
 719  Child1 | SubChild1
 720  Child1 | SubChild2
 721 (5 rows)
 722 </screen>
 723
 724      In the last <command>SELECT</command>,
 725      notice that no output row appears for <literal>Child2</>, <literal>Child3</>, etc.
 726      This happens because <function>listchildren</function> returns an empty set
 727      for those arguments, so no result rows are generated.
 728     </para>
 729    </sect2>
 730
 731    <sect2>
 732     <title>Polymorphic <acronym>SQL</acronym> Functions</title>
 733
 734     <para>
 735      <acronym>SQL</acronym> functions can be declared to accept and
 736      return the polymorphic types <type>anyelement</type>,
 737      <type>anyarray</type>, <type>anynonarray</type>, and
 738      <type>anyenum</type>.  See <xref
 739      linkend="extend-types-polymorphic"> for a more detailed
 740      explanation of polymorphic functions. Here is a polymorphic
 741      function <function>make_array</function> that builds up an array
 742      from two arbitrary data type elements:
 743 <screen>
 744 CREATE FUNCTION make_array(anyelement, anyelement) RETURNS anyarray AS $$
 745     SELECT ARRAY[$1, $2];
 746 $$ LANGUAGE SQL;
 747
 748 SELECT make_array(1, 2) AS intarray, make_array('a'::text, 'b') AS textarray;
 749  intarray | textarray
 750 ----------+-----------
 751  {1,2}    | {a,b}
 752 (1 row)
 753 </screen>
 754     </para>
 755
 756     <para>
 757      Notice the use of the typecast <literal>'a'::text</literal>
 758      to specify that the argument is of type <type>text</type>. This is
 759      required if the argument is just a string literal, since otherwise
 760      it would be treated as type
 761      <type>unknown</type>, and array of <type>unknown</type> is not a valid
 762      type.
 763      Without the typecast, you will get errors like this:
 764 <screen>
 765 <computeroutput>
 766 ERROR:  could not determine polymorphic type because input has type "unknown"
 767 </computeroutput>
 768 </screen>
 769     </para>
 770
 771     <para>
 772      It is permitted to have polymorphic arguments with a fixed
 773      return type, but the converse is not. For example:
 774 <screen>
 775 CREATE FUNCTION is_greater(anyelement, anyelement) RETURNS boolean AS $$
 776     SELECT $1 &gt; $2;
 777 $$ LANGUAGE SQL;
 778
 779 SELECT is_greater(1, 2);
 780  is_greater
 781 ------------
 782  f
 783 (1 row)
 784
 785 CREATE FUNCTION invalid_func() RETURNS anyelement AS $$
 786     SELECT 1;
 787 $$ LANGUAGE SQL;
 788 ERROR:  cannot determine result data type
 789 DETAIL:  A function returning a polymorphic type must have at least one polymorphic argument.
 790 </screen>
 791     </para>
 792
 793     <para>
 794      Polymorphism can be used with functions that have output arguments.
 795      For example:
 796 <screen>
 797 CREATE FUNCTION dup (f1 anyelement, OUT f2 anyelement, OUT f3 anyarray)
 798 AS 'select $1, array[$1,$1]' LANGUAGE sql;
 799
 800 SELECT * FROM dup(22);
 801  f2 |   f3
 802 ----+---------
 803  22 | {22,22}
 804 (1 row)
 805 </screen>
 806     </para>
 807    </sect2>
 808   </sect1>
 809
 810   <sect1 id="xfunc-overload">
 811    <title>Function Overloading</title>
 812
 813    <indexterm zone="xfunc-overload">
 814     <primary>overloading</primary>
 815     <secondary>functions</secondary>
 816    </indexterm>
 817
 818    <para>
 819     More than one function can be defined with the same SQL name, so long
 820     as the arguments they take are different.  In other words,
 821     function names can be <firstterm>overloaded</firstterm>.  When a
 822     query is executed, the server will determine which function to
 823     call from the data types and the number of the provided arguments.
 824     Overloading can also be used to simulate functions with a variable
 825     number of arguments, up to a finite maximum number.
 826    </para>
 827
 828    <para>
 829     When creating a family of overloaded functions, one should be
 830     careful not to create ambiguities.  For instance, given the
 831     functions:
 832 <programlisting>
 833 CREATE FUNCTION test(int, real) RETURNS ...
 834 CREATE FUNCTION test(smallint, double precision) RETURNS ...
 835 </programlisting>
 836     it is not immediately clear which function would be called with
 837     some trivial input like <literal>test(1, 1.5)</literal>.  The
 838     currently implemented resolution rules are described in
 839     <xref linkend="typeconv">, but it is unwise to design a system that subtly
 840     relies on this behavior.
 841    </para>
 842
 843    <para>
 844     A function that takes a single argument of a composite type should
 845     generally not have the same name as any attribute (field) of that type.
 846     Recall that <literal>attribute(table)</literal> is considered equivalent
 847     to <literal>table.attribute</literal>.  In the case that there is an
 848     ambiguity between a function on a composite type and an attribute of
 849     the composite type, the attribute will always be used.  It is possible
 850     to override that choice by schema-qualifying the function name
 851     (that is, <literal>schema.func(table)</literal>) but it's better to
 852     avoid the problem by not choosing conflicting names.
 853    </para>
 854
 855    <para>
 856     When overloading C-language functions, there is an additional
 857     constraint: The C name of each function in the family of
 858     overloaded functions must be different from the C names of all
 859     other functions, either internal or dynamically loaded.  If this
 860     rule is violated, the behavior is not portable.  You might get a
 861     run-time linker error, or one of the functions will get called
 862     (usually the internal one).  The alternative form of the
 863     <literal>AS</> clause for the SQL <command>CREATE
 864     FUNCTION</command> command decouples the SQL function name from
 865     the function name in the C source code.  For instance:
 866 <programlisting>
 867 CREATE FUNCTION test(int) RETURNS int
 868     AS '<replaceable>filename</>', 'test_1arg'
 869     LANGUAGE C;
 870 CREATE FUNCTION test(int, int) RETURNS int
 871     AS '<replaceable>filename</>', 'test_2arg'
 872     LANGUAGE C;
 873 </programlisting>
 874     The names of the C functions here reflect one of many possible conventions.
 875    </para>
 876   </sect1>
 877
 878   <sect1 id="xfunc-volatility">
 879    <title>Function Volatility Categories</title>
 880
 881    <indexterm zone="xfunc-volatility">
 882     <primary>volatility</primary>
 883     <secondary>functions</secondary>
 884    </indexterm>
 885    <indexterm zone="xfunc-volatility">
 886     <primary>VOLATILE</primary>
 887    </indexterm>
 888    <indexterm zone="xfunc-volatility">
 889     <primary>STABLE</primary>
 890    </indexterm>
 891    <indexterm zone="xfunc-volatility">
 892     <primary>IMMUTABLE</primary>
 893    </indexterm>
 894
 895    <para>
 896     Every function has a <firstterm>volatility</> classification, with
 897     the possibilities being <literal>VOLATILE</>, <literal>STABLE</>, or
 898     <literal>IMMUTABLE</>.  <literal>VOLATILE</> is the default if the
 899     <xref linkend="sql-createfunction" endterm="sql-createfunction-title">
 900     command does not specify a category.  The volatility category is a
 901     promise to the optimizer about the behavior of the function:
 902
 903    <itemizedlist>
 904     <listitem>
 905      <para>
 906       A <literal>VOLATILE</> function can do anything, including modifying
 907       the database.  It can return different results on successive calls with
 908       the same arguments.  The optimizer makes no assumptions about the
 909       behavior of such functions.  A query using a volatile function will
 910       re-evaluate the function at every row where its value is needed.
 911      </para>
 912     </listitem>
 913     <listitem>
 914      <para>
 915       A <literal>STABLE</> function cannot modify the database and is
 916       guaranteed to return the same results given the same arguments
 917       for all rows within a single statement. This category allows the
 918       optimizer to optimize multiple calls of the function to a single
 919       call. In particular, it is safe to use an expression containing
 920       such a function in an index scan condition. (Since an index scan
 921       will evaluate the comparison value only once, not once at each
 922       row, it is not valid to use a <literal>VOLATILE</> function in an
 923       index scan condition.)
 924      </para>
 925     </listitem>
 926     <listitem>
 927      <para>
 928       An <literal>IMMUTABLE</> function cannot modify the database and is
 929       guaranteed to return the same results given the same arguments forever.
 930       This category allows the optimizer to pre-evaluate the function when
 931       a query calls it with constant arguments.  For example, a query like
 932       <literal>SELECT ... WHERE x = 2 + 2</> can be simplified on sight to
 933       <literal>SELECT ... WHERE x = 4</>, because the function underlying
 934       the integer addition operator is marked <literal>IMMUTABLE</>.
 935      </para>
 936     </listitem>
 937    </itemizedlist>
 938    </para>
 939
 940    <para>
 941     For best optimization results, you should label your functions with the
 942     strictest volatility category that is valid for them.
 943    </para>
 944
 945    <para>
 946     Any function with side-effects <emphasis>must</> be labeled
 947     <literal>VOLATILE</>, so that calls to it cannot be optimized away.
 948     Even a function with no side-effects needs to be labeled
 949     <literal>VOLATILE</> if its value can change within a single query;
 950     some examples are <literal>random()</>, <literal>currval()</>,
 951     <literal>timeofday()</>.
 952    </para>
 953
 954    <para>
 955     There is relatively little difference between <literal>STABLE</> and
 956     <literal>IMMUTABLE</> categories when considering simple interactive
 957     queries that are planned and immediately executed: it doesn't matter
 958     a lot whether a function is executed once during planning or once during
 959     query execution startup.  But there is a big difference if the plan is
 960     saved and reused later.  Labeling a function <literal>IMMUTABLE</> when
 961     it really isn't might allow it to be prematurely folded to a constant during
 962     planning, resulting in a stale value being re-used during subsequent uses
 963     of the plan.  This is a hazard when using prepared statements or when
 964     using function languages that cache plans (such as
 965     <application>PL/pgSQL</>).
 966    </para>
 967
 968    <para>
 969     Because of the snapshotting behavior of MVCC (see <xref linkend="mvcc">)
 970     a function containing only <command>SELECT</> commands can safely be
 971     marked <literal>STABLE</>, even if it selects from tables that might be
 972     undergoing modifications by concurrent queries.
 973     <productname>PostgreSQL</productname> will execute a <literal>STABLE</>
 974     function using the snapshot established for the calling query, and so it
 975     will see a fixed view of the database throughout that query.
 976     Also note
 977     that the <function>current_timestamp</> family of functions qualify
 978     as stable, since their values do not change within a transaction.
 979    </para>
 980
 981    <para>
 982     The same snapshotting behavior is used for <command>SELECT</> commands
 983     within <literal>IMMUTABLE</> functions.  It is generally unwise to select
 984     from database tables within an <literal>IMMUTABLE</> function at all,
 985     since the immutability will be broken if the table contents ever change.
 986     However, <productname>PostgreSQL</productname> does not enforce that you
 987     do not do that.
 988    </para>
 989
 990    <para>
 991     A common error is to label a function <literal>IMMUTABLE</> when its
 992     results depend on a configuration parameter.  For example, a function
 993     that manipulates timestamps might well have results that depend on the
 994     <xref linkend="guc-timezone"> setting.  For safety, such functions should
 995     be labeled <literal>STABLE</> instead.
 996    </para>
 997
 998    <note>
 999     <para>
1000      Before <productname>PostgreSQL</productname> release 8.0, the requirement
1001      that <literal>STABLE</> and <literal>IMMUTABLE</> functions cannot modify
1002      the database was not enforced by the system.  Release 8.0 enforces it
1003      by requiring SQL functions and procedural language functions of these
1004      categories to contain no SQL commands other than <command>SELECT</>.
1005      (This is not a completely bulletproof test, since such functions could
1006      still call <literal>VOLATILE</> functions that modify the database.
1007      If you do that, you will find that the <literal>STABLE</> or
1008      <literal>IMMUTABLE</> function does not notice the database changes
1009      applied by the called function.)
1010     </para>
1011    </note>
1012   </sect1>
1013
1014   <sect1 id="xfunc-pl">
1015    <title>Procedural Language Functions</title>
1016
1017    <para>
1018     <productname>PostgreSQL</productname> allows user-defined functions
1019     to be written in other languages besides SQL and C.  These other
1020     languages are generically called <firstterm>procedural
1021     languages</firstterm> (<acronym>PL</>s).
1022     Procedural languages aren't built into the
1023     <productname>PostgreSQL</productname> server; they are offered
1024     by loadable modules.
1025     See <xref linkend="xplang"> and following chapters for more
1026     information.
1027    </para>
1028   </sect1>
1029
1030   <sect1 id="xfunc-internal">
1031    <title>Internal Functions</title>
1032
1033    <indexterm zone="xfunc-internal"><primary>function</><secondary>internal</></>
1034
1035    <para>
1036     Internal functions are functions written in C that have been statically
1037     linked into the <productname>PostgreSQL</productname> server.
1038     The <quote>body</quote> of the function definition
1039     specifies the C-language name of the function, which need not be the
1040     same as the name being declared for SQL use.
1041     (For reasons of backwards compatibility, an empty body
1042     is accepted as meaning that the C-language function name is the
1043     same as the SQL name.)
1044    </para>
1045
1046    <para>
1047     Normally, all internal functions present in the
1048     server are declared during the initialization of the database cluster (<command>initdb</command>),
1049     but a user could use <command>CREATE FUNCTION</command>
1050     to create additional alias names for an internal function.
1051     Internal functions are declared in <command>CREATE FUNCTION</command>
1052     with language name <literal>internal</literal>.  For instance, to
1053     create an alias for the <function>sqrt</function> function:
1054 <programlisting>
1055 CREATE FUNCTION square_root(double precision) RETURNS double precision
1056     AS 'dsqrt'
1057     LANGUAGE internal
1058     STRICT;
1059 </programlisting>
1060     (Most internal functions expect to be declared <quote>strict</quote>.)
1061    </para>
1062
1063    <note>
1064     <para>
1065      Not all <quote>predefined</quote> functions are
1066      <quote>internal</quote> in the above sense.  Some predefined
1067      functions are written in SQL.
1068     </para>
1069    </note>
1070   </sect1>
1071
1072   <sect1 id="xfunc-c">
1073    <title>C-Language Functions</title>
1074
1075    <indexterm zone="xfunc-c">
1076     <primary>function</primary>
1077     <secondary>user-defined</secondary>
1078     <tertiary>in C</tertiary>
1079    </indexterm>
1080
1081    <para>
1082     User-defined functions can be written in C (or a language that can
1083     be made compatible with C, such as C++).  Such functions are
1084     compiled into dynamically loadable objects (also called shared
1085     libraries) and are loaded by the server on demand.  The dynamic
1086     loading feature is what distinguishes <quote>C language</> functions
1087     from <quote>internal</> functions &mdash; the actual coding conventions
1088     are essentially the same for both.  (Hence, the standard internal
1089     function library is a rich source of coding examples for user-defined
1090     C functions.)
1091    </para>
1092
1093    <para>
1094     Two different calling conventions are currently used for C functions.
1095     The newer <quote>version 1</quote> calling convention is indicated by writing
1096     a <literal>PG_FUNCTION_INFO_V1()</literal> macro call for the function,
1097     as illustrated below.  Lack of such a macro indicates an old-style
1098     (<quote>version 0</quote>) function.  The language name specified in <command>CREATE FUNCTION</command>
1099     is <literal>C</literal> in either case.  Old-style functions are now deprecated
1100     because of portability problems and lack of functionality, but they
1101     are still supported for compatibility reasons.
1102    </para>
1103
1104   <sect2 id="xfunc-c-dynload">
1105    <title>Dynamic Loading</title>
1106
1107    <indexterm zone="xfunc-c-dynload">
1108     <primary>dynamic loading</primary>
1109    </indexterm>
1110
1111    <para>
1112     The first time a user-defined function in a particular
1113     loadable object file is called in a session,
1114     the dynamic loader loads that object file into memory so that the
1115     function can be called.  The <command>CREATE FUNCTION</command>
1116     for a user-defined C function must therefore specify two pieces of
1117     information for the function: the name of the loadable
1118     object file, and the C name (link symbol) of the specific function to call
1119     within that object file.  If the C name is not explicitly specified then
1120     it is assumed to be the same as the SQL function name.
1121    </para>
1122
1123    <para>
1124     The following algorithm is used to locate the shared object file
1125     based on the name given in the <command>CREATE FUNCTION</command>
1126     command:
1127
1128     <orderedlist>
1129      <listitem>
1130       <para>
1131        If the name is an absolute path, the given file is loaded.
1132       </para>
1133      </listitem>
1134
1135      <listitem>
1136       <para>
1137        If the name starts with the string <literal>$libdir</literal>,
1138        that part is replaced by the <productname>PostgreSQL</> package
1139         library directory
1140        name, which is determined at build time.<indexterm><primary>$libdir</></>
1141       </para>
1142      </listitem>
1143
1144      <listitem>
1145       <para>
1146        If the name does not contain a directory part, the file is
1147        searched for in the path specified by the configuration variable
1148        <xref linkend="guc-dynamic-library-path">.<indexterm><primary>dynamic_library_path</></>
1149       </para>
1150      </listitem>
1151
1152      <listitem>
1153       <para>
1154        Otherwise (the file was not found in the path, or it contains a
1155        non-absolute directory part), the dynamic loader will try to
1156        take the name as given, which will most likely fail.  (It is
1157        unreliable to depend on the current working directory.)
1158       </para>
1159      </listitem>
1160     </orderedlist>
1161
1162     If this sequence does not work, the platform-specific shared
1163     library file name extension (often <filename>.so</filename>) is
1164     appended to the given name and this sequence is tried again.  If
1165     that fails as well, the load will fail.
1166    </para>
1167
1168    <para>
1169     It is recommended to locate shared libraries either relative to
1170     <literal>$libdir</literal> or through the dynamic library path.
1171     This simplifies version upgrades if the new installation is at a
1172     different location.  The actual directory that
1173     <literal>$libdir</literal> stands for can be found out with the
1174     command <literal>pg_config --pkglibdir</literal>.
1175    </para>
1176
1177    <para>
1178     The user ID the <productname>PostgreSQL</productname> server runs
1179     as must be able to traverse the path to the file you intend to
1180     load.  Making the file or a higher-level directory not readable
1181     and/or not executable by the <systemitem>postgres</systemitem>
1182     user is a common mistake.
1183    </para>
1184
1185    <para>
1186     In any case, the file name that is given in the
1187     <command>CREATE FUNCTION</command> command is recorded literally
1188     in the system catalogs, so if the file needs to be loaded again
1189     the same procedure is applied.
1190    </para>
1191
1192    <note>
1193     <para>
1194      <productname>PostgreSQL</productname> will not compile a C function
1195      automatically.  The object file must be compiled before it is referenced
1196      in a <command>CREATE
1197      FUNCTION</> command.  See <xref linkend="dfunc"> for additional
1198      information.
1199     </para>
1200    </note>
1201
1202    <indexterm zone="xfunc-c-dynload">
1203     <primary>magic block</primary>
1204    </indexterm>
1205
1206    <para>
1207     To ensure that a dynamically loaded object file is not loaded into an
1208     incompatible server, <productname>PostgreSQL</productname> checks that the
1209     file contains a <quote>magic block</> with the appropriate contents.
1210     This allows the server to detect obvious incompatibilities, such as code
1211     compiled for a different major version of
1212     <productname>PostgreSQL</productname>.  A magic block is required as of
1213     <productname>PostgreSQL</productname> 8.2.  To include a magic block,
1214     write this in one (and only one) of the module source files, after having
1215     included the header <filename>fmgr.h</>:
1216
1217 <programlisting>
1218 #ifdef PG_MODULE_MAGIC
1219 PG_MODULE_MAGIC;
1220 #endif
1221 </programlisting>
1222
1223     The <literal>#ifdef</> test can be omitted if the code doesn't
1224     need to compile against pre-8.2 <productname>PostgreSQL</productname>
1225     releases.
1226    </para>
1227
1228    <para>
1229     After it is used for the first time, a dynamically loaded object
1230     file is retained in memory.  Future calls in the same session to
1231     the function(s) in that file will only incur the small overhead of
1232     a symbol table lookup.  If you need to force a reload of an object
1233     file, for example after recompiling it, use the <xref
1234     linkend="sql-load" endterm="sql-load-title"> command or begin a
1235     fresh session.
1236    </para>
1237
1238    <indexterm zone="xfunc-c-dynload">
1239     <primary>_PG_init</primary>
1240    </indexterm>
1241    <indexterm zone="xfunc-c-dynload">
1242     <primary>_PG_fini</primary>
1243    </indexterm>
1244    <indexterm zone="xfunc-c-dynload">
1245     <primary>library initialization function</primary>
1246    </indexterm>
1247    <indexterm zone="xfunc-c-dynload">
1248     <primary>library finalization function</primary>
1249    </indexterm>
1250
1251    <para>
1252     Optionally, a dynamically loaded file can contain initialization and
1253     finalization functions.  If the file includes a function named
1254     <function>_PG_init</>, that function will be called immediately after
1255     loading the file.  The function receives no parameters and should
1256     return void.  If the file includes a function named
1257     <function>_PG_fini</>, that function will be called immediately before
1258     unloading the file.  Likewise, the function receives no parameters and
1259     should return void.  Note that <function>_PG_fini</> will only be called
1260     during an unload of the file, not during process termination.
1261     (Presently, an unload only happens in the context of re-loading
1262     the file due to an explicit <command>LOAD</> command.)
1263    </para>
1264
1265   </sect2>
1266
1267    <sect2 id="xfunc-c-basetype">
1268     <title>Base Types in C-Language Functions</title>
1269
1270     <indexterm zone="xfunc-c-basetype">
1271      <primary>data type</primary>
1272      <secondary>internal organization</secondary>
1273     </indexterm>
1274
1275     <para>
1276      To know how to write C-language functions, you need to know how
1277      <productname>PostgreSQL</productname> internally represents base
1278      data types and how they can be passed to and from functions.
1279      Internally, <productname>PostgreSQL</productname> regards a base
1280      type as a <quote>blob of memory</quote>.  The user-defined
1281      functions that you define over a type in turn define the way that
1282      <productname>PostgreSQL</productname> can operate on it.  That
1283      is, <productname>PostgreSQL</productname> will only store and
1284      retrieve the data from disk and use your user-defined functions
1285      to input, process, and output the data.
1286     </para>
1287
1288     <para>
1289      Base types can have one of three internal formats:
1290
1291      <itemizedlist>
1292       <listitem>
1293        <para>
1294         pass by value, fixed-length
1295        </para>
1296       </listitem>
1297       <listitem>
1298        <para>
1299         pass by reference, fixed-length
1300        </para>
1301       </listitem>
1302       <listitem>
1303        <para>
1304         pass by reference, variable-length
1305        </para>
1306       </listitem>
1307      </itemizedlist>
1308     </para>
1309
1310     <para>
1311      By-value  types  can  only be 1, 2, or 4 bytes in length
1312      (also 8 bytes, if <literal>sizeof(Datum)</literal> is 8 on your machine).
1313      You should be careful
1314      to define your types such that  they  will  be  the  same
1315      size (in bytes) on all architectures.  For example, the
1316      <literal>long</literal> type is dangerous because  it
1317      is 4 bytes on some machines and 8 bytes on others, whereas
1318      <type>int</type>  type  is  4  bytes  on  most
1319      Unix machines.  A reasonable implementation of
1320      the  <type>int4</type>  type  on  Unix
1321      machines might be:
1322
1323 <programlisting>
1324 /* 4-byte integer, passed by value */
1325 typedef int int4;
1326 </programlisting>
1327     </para>
1328
1329     <para>
1330      On  the  other hand, fixed-length types of any size can
1331      be passed by-reference.  For example, here is a  sample
1332      implementation of a <productname>PostgreSQL</productname> type:
1333
1334 <programlisting>
1335 /* 16-byte structure, passed by reference */
1336 typedef struct
1337 {
1338     double  x, y;
1339 } Point;
1340 </programlisting>
1341
1342      Only  pointers  to  such types can be used when passing
1343      them in and out of <productname>PostgreSQL</productname> functions.
1344      To return a value of such a type, allocate the right amount of
1345      memory with <literal>palloc</literal>, fill in the allocated memory,
1346      and return a pointer to it.  (Also, if you just want to return the
1347      same value as one of your input arguments that's of the same data type,
1348      you can skip the extra <literal>palloc</literal> and just return the
1349      pointer to the input value.)
1350     </para>
1351
1352     <para>
1353      Finally, all variable-length types must also be  passed
1354      by  reference.   All  variable-length  types must begin
1355      with a length field of exactly 4 bytes, and all data to
1356      be  stored within that type must be located in the memory
1357      immediately  following  that  length  field.   The
1358      length field contains the total length of the structure,
1359      that is,  it  includes  the  size  of  the  length  field
1360      itself.
1361     </para>
1362
1363     <warning>
1364      <para>
1365       <emphasis>Never</> modify the contents of a pass-by-reference input
1366       value.  If you do so you are likely to corrupt on-disk data, since
1367       the pointer you are given might point directly into a disk buffer.
1368       The sole exception to this rule is explained in
1369       <xref linkend="xaggr">.
1370      </para>
1371     </warning>
1372
1373     <para>
1374      As an example, we can define the type <type>text</type> as
1375      follows:
1376
1377 <programlisting>
1378 typedef struct {
1379     int4 length;
1380     char data[1];
1381 } text;
1382 </programlisting>
1383
1384      Obviously,  the  data  field declared here is not long enough to hold
1385      all possible strings.  Since it's impossible to declare a variable-size
1386      structure in <acronym>C</acronym>, we rely on the knowledge that the
1387      <acronym>C</acronym> compiler won't range-check array subscripts.  We
1388      just allocate the necessary amount of space and then access the array as
1389      if it were declared the right length.  (This is a common trick, which
1390      you can read about in many textbooks about C.)
1391     </para>
1392
1393     <para>
1394      When manipulating
1395      variable-length types, we must  be  careful  to  allocate
1396      the  correct amount  of memory and set the length field correctly.
1397      For example, if we wanted to  store  40  bytes  in  a <structname>text</>
1398      structure, we might use a code fragment like this:
1399
1400 <programlisting>
1401 #include "postgres.h"
1402 ...
1403 char buffer[40]; /* our source data */
1404 ...
1405 text *destination = (text *) palloc(VARHDRSZ + 40);
1406 destination-&gt;length = VARHDRSZ + 40;
1407 memcpy(destination-&gt;data, buffer, 40);
1408 ...
1409 </programlisting>
1410
1411      <literal>VARHDRSZ</> is the same as <literal>sizeof(int4)</>, but
1412      it's considered good style to use the macro <literal>VARHDRSZ</>
1413      to refer to the size of the overhead for a variable-length type.
1414     </para>
1415
1416     <para>
1417      <xref linkend="xfunc-c-type-table"> specifies which C type
1418      corresponds to which SQL type when writing a C-language function
1419      that uses a built-in type of <productname>PostgreSQL</>.
1420      The <quote>Defined In</quote> column gives the header file that
1421      needs to be included to get the type definition.  (The actual
1422      definition might be in a different file that is included by the
1423      listed file.  It is recommended that users stick to the defined
1424      interface.)  Note that you should always include
1425      <filename>postgres.h</filename> first in any source file, because
1426      it declares a number of things that you will need anyway.
1427     </para>
1428
1429      <table tocentry="1" id="xfunc-c-type-table">
1430       <title>Equivalent C Types for Built-In SQL Types</title>
1431       <tgroup cols="3">
1432        <thead>
1433         <row>
1434          <entry>
1435           SQL Type
1436          </entry>
1437          <entry>
1438           C Type
1439          </entry>
1440          <entry>
1441           Defined In
1442          </entry>
1443         </row>
1444        </thead>
1445        <tbody>
1446         <row>
1447          <entry><type>abstime</type></entry>
1448          <entry><type>AbsoluteTime</type></entry>
1449          <entry><filename>utils/nabstime.h</filename></entry>
1450         </row>
1451         <row>
1452          <entry><type>boolean</type></entry>
1453          <entry><type>bool</type></entry>
1454          <entry><filename>postgres.h</filename> (maybe compiler built-in)</entry>
1455         </row>
1456         <row>
1457          <entry><type>box</type></entry>
1458          <entry><type>BOX*</type></entry>
1459          <entry><filename>utils/geo_decls.h</filename></entry>
1460         </row>
1461         <row>
1462          <entry><type>bytea</type></entry>
1463          <entry><type>bytea*</type></entry>
1464          <entry><filename>postgres.h</filename></entry>
1465         </row>
1466         <row>
1467          <entry><type>"char"</type></entry>
1468          <entry><type>char</type></entry>
1469          <entry>(compiler built-in)</entry>
1470         </row>
1471         <row>
1472          <entry><type>character</type></entry>
1473          <entry><type>BpChar*</type></entry>
1474          <entry><filename>postgres.h</filename></entry>
1475         </row>
1476         <row>
1477          <entry><type>cid</type></entry>
1478          <entry><type>CommandId</type></entry>
1479          <entry><filename>postgres.h</filename></entry>
1480         </row>
1481         <row>
1482          <entry><type>date</type></entry>
1483          <entry><type>DateADT</type></entry>
1484          <entry><filename>utils/date.h</filename></entry>
1485         </row>
1486         <row>
1487          <entry><type>smallint</type> (<type>int2</type>)</entry>
1488          <entry><type>int2</type> or <type>int16</type></entry>
1489          <entry><filename>postgres.h</filename></entry>
1490         </row>
1491         <row>
1492          <entry><type>int2vector</type></entry>
1493          <entry><type>int2vector*</type></entry>
1494          <entry><filename>postgres.h</filename></entry>
1495         </row>
1496         <row>
1497          <entry><type>integer</type> (<type>int4</type>)</entry>
1498          <entry><type>int4</type> or <type>int32</type></entry>
1499          <entry><filename>postgres.h</filename></entry>
1500         </row>
1501         <row>
1502          <entry><type>real</type> (<type>float4</type>)</entry>
1503          <entry><type>float4*</type></entry>
1504         <entry><filename>postgres.h</filename></entry>
1505         </row>
1506         <row>
1507          <entry><type>double precision</type> (<type>float8</type>)</entry>
1508          <entry><type>float8*</type></entry>
1509          <entry><filename>postgres.h</filename></entry>
1510         </row>
1511         <row>
1512          <entry><type>interval</type></entry>
1513          <entry><type>Interval*</type></entry>
1514          <entry><filename>utils/timestamp.h</filename></entry>
1515         </row>
1516         <row>
1517          <entry><type>lseg</type></entry>
1518          <entry><type>LSEG*</type></entry>
1519          <entry><filename>utils/geo_decls.h</filename></entry>
1520         </row>
1521         <row>
1522          <entry><type>name</type></entry>
1523          <entry><type>Name</type></entry>
1524          <entry><filename>postgres.h</filename></entry>
1525         </row>
1526         <row>
1527          <entry><type>oid</type></entry>
1528          <entry><type>Oid</type></entry>
1529          <entry><filename>postgres.h</filename></entry>
1530         </row>
1531         <row>
1532          <entry><type>oidvector</type></entry>
1533          <entry><type>oidvector*</type></entry>
1534          <entry><filename>postgres.h</filename></entry>
1535         </row>
1536         <row>
1537          <entry><type>path</type></entry>
1538          <entry><type>PATH*</type></entry>
1539          <entry><filename>utils/geo_decls.h</filename></entry>
1540         </row>
1541         <row>
1542          <entry><type>point</type></entry>
1543          <entry><type>POINT*</type></entry>
1544          <entry><filename>utils/geo_decls.h</filename></entry>
1545         </row>
1546         <row>
1547          <entry><type>regproc</type></entry>
1548          <entry><type>regproc</type></entry>
1549          <entry><filename>postgres.h</filename></entry>
1550         </row>
1551         <row>
1552          <entry><type>reltime</type></entry>
1553          <entry><type>RelativeTime</type></entry>
1554          <entry><filename>utils/nabstime.h</filename></entry>
1555         </row>
1556         <row>
1557          <entry><type>text</type></entry>
1558          <entry><type>text*</type></entry>
1559          <entry><filename>postgres.h</filename></entry>
1560         </row>
1561         <row>
1562          <entry><type>tid</type></entry>
1563          <entry><type>ItemPointer</type></entry>
1564          <entry><filename>storage/itemptr.h</filename></entry>
1565         </row>
1566         <row>
1567          <entry><type>time</type></entry>
1568          <entry><type>TimeADT</type></entry>
1569          <entry><filename>utils/date.h</filename></entry>
1570         </row>
1571         <row>
1572          <entry><type>time with time zone</type></entry>
1573          <entry><type>TimeTzADT</type></entry>
1574          <entry><filename>utils/date.h</filename></entry>
1575         </row>
1576         <row>
1577          <entry><type>timestamp</type></entry>
1578          <entry><type>Timestamp*</type></entry>
1579          <entry><filename>utils/timestamp.h</filename></entry>
1580         </row>
1581         <row>
1582          <entry><type>tinterval</type></entry>
1583          <entry><type>TimeInterval</type></entry>
1584          <entry><filename>utils/nabstime.h</filename></entry>
1585         </row>
1586         <row>
1587          <entry><type>varchar</type></entry>
1588          <entry><type>VarChar*</type></entry>
1589          <entry><filename>postgres.h</filename></entry>
1590         </row>
1591         <row>
1592          <entry><type>xid</type></entry>
1593          <entry><type>TransactionId</type></entry>
1594          <entry><filename>postgres.h</filename></entry>
1595         </row>
1596        </tbody>
1597       </tgroup>
1598      </table>
1599
1600     <para>
1601      Now that we've gone over all of the possible structures
1602      for base types, we can show some examples of real functions.
1603     </para>
1604    </sect2>
1605
1606    <sect2>
1607     <title>Version 0 Calling Conventions</title>
1608
1609     <para>
1610      We present the <quote>old style</quote> calling convention first &mdash; although
1611      this approach is now deprecated, it's easier to get a handle on
1612      initially.  In the version-0 method, the arguments and result
1613      of the C function are just declared in normal C style, but being
1614      careful to use the C representation of each SQL data type as shown
1615      above.
1616     </para>
1617
1618     <para>
1619      Here are some examples:
1620
1621 <programlisting>
1622 #include "postgres.h"
1623 #include &lt;string.h&gt;
1624
1625 /* by value */
1626
1627 int
1628 add_one(int arg)
1629 {
1630     return arg + 1;
1631 }
1632
1633 /* by reference, fixed length */
1634
1635 float8 *
1636 add_one_float8(float8 *arg)
1637 {
1638     float8    *result = (float8 *) palloc(sizeof(float8));
1639
1640     *result = *arg + 1.0;
1641
1642     return result;
1643 }
1644
1645 Point *
1646 makepoint(Point *pointx, Point *pointy)
1647 {
1648     Point     *new_point = (Point *) palloc(sizeof(Point));
1649
1650     new_point-&gt;x = pointx-&gt;x;
1651     new_point-&gt;y = pointy-&gt;y;
1652
1653     return new_point;
1654 }
1655
1656 /* by reference, variable length */
1657
1658 text *
1659 copytext(text *t)
1660 {
1661     /*
1662      * VARSIZE is the total size of the struct in bytes.
1663      */
1664     text *new_t = (text *) palloc(VARSIZE(t));
1665     SET_VARSIZE(new_t, VARSIZE(t));
1666     /*
1667      * VARDATA is a pointer to the data region of the struct.
1668      */
1669     memcpy((void *) VARDATA(new_t), /* destination */
1670            (void *) VARDATA(t),     /* source */
1671            VARSIZE(t) - VARHDRSZ);  /* how many bytes */
1672     return new_t;
1673 }
1674
1675 text *
1676 concat_text(text *arg1, text *arg2)
1677 {
1678     int32 new_text_size = VARSIZE(arg1) + VARSIZE(arg2) - VARHDRSZ;
1679     text *new_text = (text *) palloc(new_text_size);
1680
1681     SET_VARSIZE(new_text, new_text_size);
1682     memcpy(VARDATA(new_text), VARDATA(arg1), VARSIZE(arg1) - VARHDRSZ);
1683     memcpy(VARDATA(new_text) + (VARSIZE(arg1) - VARHDRSZ),
1684            VARDATA(arg2), VARSIZE(arg2) - VARHDRSZ);
1685     return new_text;
1686 }
1687 </programlisting>
1688     </para>
1689
1690     <para>
1691      Supposing that the above code has been prepared in file
1692      <filename>funcs.c</filename> and compiled into a shared object,
1693      we could define the functions to <productname>PostgreSQL</productname>
1694      with commands like this:
1695
1696 <programlisting>
1697 CREATE FUNCTION add_one(integer) RETURNS integer
1698      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'add_one'
1699      LANGUAGE C STRICT;
1700
1701 -- note overloading of SQL function name "add_one"
1702 CREATE FUNCTION add_one(double precision) RETURNS double precision
1703      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'add_one_float8'
1704      LANGUAGE C STRICT;
1705
1706 CREATE FUNCTION makepoint(point, point) RETURNS point
1707      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'makepoint'
1708      LANGUAGE C STRICT;
1709
1710 CREATE FUNCTION copytext(text) RETURNS text
1711      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'copytext'
1712      LANGUAGE C STRICT;
1713
1714 CREATE FUNCTION concat_text(text, text) RETURNS text
1715      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'concat_text'
1716      LANGUAGE C STRICT;
1717 </programlisting>
1718     </para>
1719
1720     <para>
1721      Here, <replaceable>DIRECTORY</replaceable> stands for the
1722      directory of the shared library file (for instance the
1723      <productname>PostgreSQL</productname> tutorial directory, which
1724      contains the code for the examples used in this section).
1725      (Better style would be to use just <literal>'funcs'</> in the
1726      <literal>AS</> clause, after having added
1727      <replaceable>DIRECTORY</replaceable> to the search path.  In any
1728      case, we can omit the system-specific extension for a shared
1729      library, commonly <literal>.so</literal> or
1730      <literal>.sl</literal>.)
1731     </para>
1732
1733     <para>
1734      Notice that we have specified the functions as <quote>strict</quote>,
1735      meaning that
1736      the system should automatically assume a null result if any input
1737      value is null.  By doing this, we avoid having to check for null inputs
1738      in the function code.  Without this, we'd have to check for null values
1739      explicitly, by checking for a null pointer for each
1740      pass-by-reference argument.  (For pass-by-value arguments, we don't
1741      even have a way to check!)
1742     </para>
1743
1744     <para>
1745      Although this calling convention is simple to use,
1746      it is not very portable; on some architectures there are problems
1747      with passing data types that are smaller than <type>int</type> this way.  Also, there is
1748      no simple way to return a null result, nor to cope with null arguments
1749      in any way other than making the function strict.  The version-1
1750      convention, presented next, overcomes these objections.
1751     </para>
1752    </sect2>
1753
1754    <sect2>
1755     <title>Version 1 Calling Conventions</title>
1756
1757     <para>
1758      The version-1 calling convention relies on macros to suppress most
1759      of the complexity of passing arguments and results.  The C declaration
1760      of a version-1 function is always:
1761 <programlisting>
1762 Datum funcname(PG_FUNCTION_ARGS)
1763 </programlisting>
1764      In addition, the macro call:
1765 <programlisting>
1766 PG_FUNCTION_INFO_V1(funcname);
1767 </programlisting>
1768      must appear in the same source file.  (Conventionally. it's
1769      written just before the function itself.)  This macro call is not
1770      needed for <literal>internal</>-language functions, since
1771      <productname>PostgreSQL</> assumes that all internal functions
1772      use the version-1 convention.  It is, however, required for
1773      dynamically-loaded functions.
1774     </para>
1775
1776     <para>
1777      In a version-1 function, each actual argument is fetched using a
1778      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function>
1779      macro that corresponds to the argument's data type, and the
1780      result is returned using a
1781      <function>PG_RETURN_<replaceable>xxx</replaceable>()</function>
1782      macro for the return type.
1783      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function>
1784      takes as its argument the number of the function argument to
1785      fetch, where the count starts at 0.
1786      <function>PG_RETURN_<replaceable>xxx</replaceable>()</function>
1787      takes as its argument the actual value to return.
1788     </para>
1789
1790     <para>
1791      Here we show the same functions as above, coded in version-1 style:
1792
1793 <programlisting>
1794 #include "postgres.h"
1795 #include &lt;string.h&gt;
1796 #include "fmgr.h"
1797
1798 /* by value */
1799
1800 PG_FUNCTION_INFO_V1(add_one);
1801
1802 Datum
1803 add_one(PG_FUNCTION_ARGS)
1804 {
1805     int32   arg = PG_GETARG_INT32(0);
1806
1807     PG_RETURN_INT32(arg + 1);
1808 }
1809
1810 /* by reference, fixed length */
1811
1812 PG_FUNCTION_INFO_V1(add_one_float8);
1813
1814 Datum
1815 add_one_float8(PG_FUNCTION_ARGS)
1816 {
1817     /* The macros for FLOAT8 hide its pass-by-reference nature. */
1818     float8   arg = PG_GETARG_FLOAT8(0);
1819
1820     PG_RETURN_FLOAT8(arg + 1.0);
1821 }
1822
1823 PG_FUNCTION_INFO_V1(makepoint);
1824
1825 Datum
1826 makepoint(PG_FUNCTION_ARGS)
1827 {
1828     /* Here, the pass-by-reference nature of Point is not hidden. */
1829     Point     *pointx = PG_GETARG_POINT_P(0);
1830     Point     *pointy = PG_GETARG_POINT_P(1);
1831     Point     *new_point = (Point *) palloc(sizeof(Point));
1832
1833     new_point-&gt;x = pointx-&gt;x;
1834     new_point-&gt;y = pointy-&gt;y;
1835
1836     PG_RETURN_POINT_P(new_point);
1837 }
1838
1839 /* by reference, variable length */
1840
1841 PG_FUNCTION_INFO_V1(copytext);
1842
1843 Datum
1844 copytext(PG_FUNCTION_ARGS)
1845 {
1846     text     *t = PG_GETARG_TEXT_P(0);
1847     /*
1848      * VARSIZE is the total size of the struct in bytes.
1849      */
1850     text     *new_t = (text *) palloc(VARSIZE(t));
1851     SET_VARSIZE(new_t, VARSIZE(t));
1852     /*
1853      * VARDATA is a pointer to the data region of the struct.
1854      */
1855     memcpy((void *) VARDATA(new_t), /* destination */
1856            (void *) VARDATA(t),     /* source */
1857            VARSIZE(t) - VARHDRSZ);  /* how many bytes */
1858     PG_RETURN_TEXT_P(new_t);
1859 }
1860
1861 PG_FUNCTION_INFO_V1(concat_text);
1862
1863 Datum
1864 concat_text(PG_FUNCTION_ARGS)
1865 {
1866     text  *arg1 = PG_GETARG_TEXT_P(0);
1867     text  *arg2 = PG_GETARG_TEXT_P(1);
1868     int32 new_text_size = VARSIZE(arg1) + VARSIZE(arg2) - VARHDRSZ;
1869     text *new_text = (text *) palloc(new_text_size);
1870
1871     SET_VARSIZE(new_text, new_text_size);
1872     memcpy(VARDATA(new_text), VARDATA(arg1), VARSIZE(arg1) - VARHDRSZ);
1873     memcpy(VARDATA(new_text) + (VARSIZE(arg1) - VARHDRSZ),
1874            VARDATA(arg2), VARSIZE(arg2) - VARHDRSZ);
1875     PG_RETURN_TEXT_P(new_text);
1876 }
1877 </programlisting>
1878     </para>
1879
1880     <para>
1881      The <command>CREATE FUNCTION</command> commands are the same as
1882      for the version-0 equivalents.
1883     </para>
1884
1885     <para>
1886      At first glance, the version-1 coding conventions might appear to
1887      be just pointless obscurantism.  They do, however, offer a number
1888      of improvements, because the macros can hide unnecessary detail.
1889      An example is that in coding <function>add_one_float8</>, we no longer need to
1890      be aware that <type>float8</type> is a pass-by-reference type.  Another
1891      example is that the <literal>GETARG</> macros for variable-length types allow
1892      for more efficient fetching of <quote>toasted</quote> (compressed or
1893      out-of-line) values.
1894     </para>
1895
1896     <para>
1897      One big improvement in version-1 functions is better handling of null
1898      inputs and results.  The macro <function>PG_ARGISNULL(<replaceable>n</>)</function>
1899      allows a function to test whether each input is null.  (Of course, doing
1900      this is only necessary in functions not declared <quote>strict</>.)
1901      As with the
1902      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function> macros,
1903      the input arguments are counted beginning at zero.  Note that one
1904      should refrain from executing
1905      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function> until
1906      one has verified that the argument isn't null.
1907      To return a null result, execute <function>PG_RETURN_NULL()</function>;
1908      this works in both strict and nonstrict functions.
1909     </para>
1910
1911     <para>
1912      Other options provided in the new-style interface are two
1913      variants of the
1914      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function>
1915      macros. The first of these,
1916      <function>PG_GETARG_<replaceable>xxx</replaceable>_COPY()</function>,
1917      guarantees to return a copy of the specified argument that is
1918      safe for writing into. (The normal macros will sometimes return a
1919      pointer to a value that is physically stored in a table, which
1920      must not be written to. Using the
1921      <function>PG_GETARG_<replaceable>xxx</replaceable>_COPY()</function>
1922      macros guarantees a writable result.)
1923     The second variant consists of the
1924     <function>PG_GETARG_<replaceable>xxx</replaceable>_SLICE()</function>
1925     macros which take three arguments. The first is the number of the
1926     function argument (as above). The second and third are the offset and
1927     length of the segment to be returned. Offsets are counted from
1928     zero, and a negative length requests that the remainder of the
1929     value be returned. These macros provide more efficient access to
1930     parts of large values in the case where they have storage type
1931     <quote>external</quote>. (The storage type of a column can be specified using
1932     <literal>ALTER TABLE <replaceable>tablename</replaceable> ALTER
1933     COLUMN <replaceable>colname</replaceable> SET STORAGE
1934     <replaceable>storagetype</replaceable></literal>. <replaceable>storagetype</replaceable> is one of
1935     <literal>plain</>, <literal>external</>, <literal>extended</literal>,
1936      or <literal>main</>.)
1937     </para>
1938
1939     <para>
1940      Finally, the version-1 function call conventions make it possible
1941      to return set results (<xref linkend="xfunc-c-return-set">) and
1942      implement trigger functions (<xref linkend="triggers">) and
1943      procedural-language call handlers (<xref
1944      linkend="plhandler">).  Version-1 code is also more
1945      portable than version-0, because it does not break restrictions
1946      on function call protocol in the C standard.  For more details
1947      see <filename>src/backend/utils/fmgr/README</filename> in the
1948      source distribution.
1949     </para>
1950    </sect2>
1951
1952    <sect2>
1953     <title>Writing Code</title>
1954
1955     <para>
1956      Before we turn to the more advanced topics, we should discuss
1957      some coding rules for <productname>PostgreSQL</productname>
1958      C-language functions.  While it might be possible to load functions
1959      written in languages other than C into
1960      <productname>PostgreSQL</productname>, this is usually difficult
1961      (when it is possible at all) because other languages, such as
1962      C++, FORTRAN, or Pascal often do not follow the same calling
1963      convention as C.  That is, other languages do not pass argument
1964      and return values between functions in the same way.  For this
1965      reason, we will assume that your C-language functions are
1966      actually written in C.
1967     </para>
1968
1969     <para>
1970      The basic rules for writing and building C functions are as follows:
1971
1972      <itemizedlist>
1973       <listitem>
1974        <para>
1975         Use <literal>pg_config
1976         --includedir-server</literal><indexterm><primary>pg_config</><secondary>with user-defined C functions</></>
1977         to find out where the <productname>PostgreSQL</> server header
1978         files are installed on your system (or the system that your
1979         users will be running on).
1980        </para>
1981       </listitem>
1982
1983       <listitem>
1984        <para>
1985         Compiling and linking your code so that it can be dynamically
1986         loaded into <productname>PostgreSQL</productname> always
1987         requires special flags.  See <xref linkend="dfunc"> for a
1988         detailed explanation of how to do it for your particular
1989         operating system.
1990        </para>
1991       </listitem>
1992
1993       <listitem>
1994        <para>
1995         Remember to define a <quote>magic block</> for your shared library,
1996         as described in <xref linkend="xfunc-c-dynload">.
1997        </para>
1998       </listitem>
1999
2000       <listitem>
2001        <para>
2002         When allocating memory, use the
2003         <productname>PostgreSQL</productname> functions
2004         <function>palloc</function><indexterm><primary>palloc</></> and <function>pfree</function><indexterm><primary>pfree</></>
2005         instead of the corresponding C library functions
2006         <function>malloc</function> and <function>free</function>.
2007         The memory allocated by <function>palloc</function> will be
2008         freed automatically at the end of each transaction, preventing
2009         memory leaks.
2010        </para>
2011       </listitem>
2012
2013       <listitem>
2014        <para>
2015         Always zero the bytes of your structures using
2016         <function>memset</function>.  Without this, it's difficult to
2017         support hash indexes or hash joins, as you must pick out only
2018         the significant bits of your data structure to compute a hash.
2019         Even if you initialize all fields of your structure, there might be
2020         alignment padding (holes in the structure) that contain
2021         garbage values.
2022        </para>
2023       </listitem>
2024
2025       <listitem>
2026        <para>
2027         Most of the internal <productname>PostgreSQL</productname>
2028         types are declared in <filename>postgres.h</filename>, while
2029         the function manager interfaces
2030         (<symbol>PG_FUNCTION_ARGS</symbol>, etc.)  are in
2031         <filename>fmgr.h</filename>, so you will need to include at
2032         least these two files.  For portability reasons it's best to
2033         include <filename>postgres.h</filename> <emphasis>first</>,
2034         before any other system or user header files.  Including
2035         <filename>postgres.h</filename> will also include
2036         <filename>elog.h</filename> and <filename>palloc.h</filename>
2037         for you.
2038        </para>
2039       </listitem>
2040
2041       <listitem>
2042        <para>
2043         Symbol names defined within object files must not conflict
2044         with each other or with symbols defined in the
2045         <productname>PostgreSQL</productname> server executable.  You
2046         will have to rename your functions or variables if you get
2047         error messages to this effect.
2048        </para>
2049       </listitem>
2050      </itemizedlist>
2051     </para>
2052    </sect2>
2053
2054 &dfunc;
2055
2056    <sect2 id="xfunc-c-pgxs">
2057     <title>Extension Building Infrastructure</title>
2058
2059    <indexterm zone="xfunc-c-pgxs">
2060     <primary>pgxs</primary>
2061    </indexterm>
2062
2063    <para>
2064     If you are thinking about distributing your
2065     <productname>PostgreSQL</> extension modules, setting up a
2066     portable build system for them can be fairly difficult.  Therefore
2067     the <productname>PostgreSQL</> installation provides a build
2068     infrastructure for extensions, called <acronym>PGXS</acronym>, so
2069     that simple extension modules can be built simply against an
2070     already installed server.  Note that this infrastructure is not
2071     intended to be a universal build system framework that can be used
2072     to build all software interfacing to <productname>PostgreSQL</>;
2073     it simply automates common build rules for simple server extension
2074     modules.  For more complicated packages, you need to write your
2075     own build system.
2076    </para>
2077
2078    <para>
2079     To use the infrastructure for your extension, you must write a
2080     simple makefile.  In that makefile, you need to set some variables
2081     and finally include the global <acronym>PGXS</acronym> makefile.
2082     Here is an example that builds an extension module named
2083     <literal>isbn_issn</literal> consisting of a shared library, an
2084     SQL script, and a documentation text file:
2085 <programlisting>
2086 MODULES = isbn_issn
2087 DATA_built = isbn_issn.sql
2088 DOCS = README.isbn_issn
2089
2090 PG_CONFIG = pg_config
2091 PGXS := $(shell $(PG_CONFIG) --pgxs)
2092 include $(PGXS)
2093 </programlisting>
2094     The last three lines should always be the same.  Earlier in the
2095     file, you assign variables or add custom
2096     <application>make</application> rules.
2097    </para>
2098
2099    <para>
2100     The following variables can be set:
2101
2102     <variablelist>
2103      <varlistentry>
2104       <term><varname>MODULES</varname></term>
2105       <listitem>
2106        <para>
2107         list of shared objects to be built from source file with same
2108         stem (do not include suffix in this list)
2109        </para>
2110       </listitem>
2111      </varlistentry>
2112
2113      <varlistentry>
2114       <term><varname>DATA</varname></term>
2115       <listitem>
2116        <para>
2117         random files to install into <literal><replaceable>prefix</replaceable>/share/contrib</literal>
2118        </para>
2119       </listitem>
2120      </varlistentry>
2121
2122      <varlistentry>
2123       <term><varname>DATA_built</varname></term>
2124       <listitem>
2125        <para>
2126         random files to install into
2127         <literal><replaceable>prefix</replaceable>/share/contrib</literal>,
2128         which need to be built first
2129        </para>
2130       </listitem>
2131      </varlistentry>
2132
2133      <varlistentry>
2134       <term><varname>DOCS</varname></term>
2135       <listitem>
2136        <para>
2137         random files to install under
2138         <literal><replaceable>prefix</replaceable>/doc/contrib</literal>
2139        </para>
2140       </listitem>
2141      </varlistentry>
2142
2143      <varlistentry>
2144       <term><varname>SCRIPTS</varname></term>
2145       <listitem>
2146        <para>
2147         script files (not binaries) to install into
2148         <literal><replaceable>prefix</replaceable>/bin</literal>
2149        </para>
2150       </listitem>
2151      </varlistentry>
2152
2153      <varlistentry>
2154       <term><varname>SCRIPTS_built</varname></term>
2155       <listitem>
2156        <para>
2157         script files (not binaries) to install into
2158         <literal><replaceable>prefix</replaceable>/bin</literal>,
2159         which need to be built first
2160        </para>
2161       </listitem>
2162      </varlistentry>
2163
2164      <varlistentry>
2165       <term><varname>REGRESS</varname></term>
2166       <listitem>
2167        <para>
2168         list of regression test cases (without suffix), see below
2169        </para>
2170       </listitem>
2171      </varlistentry>
2172     </variablelist>
2173
2174     or at most one of these two:
2175
2176     <variablelist>
2177      <varlistentry>
2178       <term><varname>PROGRAM</varname></term>
2179       <listitem>
2180        <para>
2181         a binary program to build (list objects files in <varname>OBJS</varname>)
2182        </para>
2183       </listitem>
2184      </varlistentry>
2185
2186      <varlistentry>
2187       <term><varname>MODULE_big</varname></term>
2188       <listitem>
2189        <para>
2190         a shared object to build (list object files in <varname>OBJS</varname>)
2191        </para>
2192       </listitem>
2193      </varlistentry>
2194     </variablelist>
2195
2196     The following can also be set:
2197
2198     <variablelist>
2199
2200      <varlistentry>
2201       <term><varname>EXTRA_CLEAN</varname></term>
2202       <listitem>
2203        <para>
2204         extra files to remove in <literal>make clean</literal>
2205        </para>
2206       </listitem>
2207      </varlistentry>
2208
2209      <varlistentry>
2210       <term><varname>PG_CPPFLAGS</varname></term>
2211       <listitem>
2212        <para>
2213         will be added to <varname>CPPFLAGS</varname>
2214        </para>
2215       </listitem>
2216      </varlistentry>
2217
2218      <varlistentry>
2219       <term><varname>PG_LIBS</varname></term>
2220       <listitem>
2221        <para>
2222         will be added to <varname>PROGRAM</varname> link line
2223        </para>
2224       </listitem>
2225      </varlistentry>
2226
2227      <varlistentry>
2228       <term><varname>SHLIB_LINK</varname></term>
2229       <listitem>
2230        <para>
2231         will be added to <varname>MODULE_big</varname> link line
2232        </para>
2233       </listitem>
2234      </varlistentry>
2235
2236      <varlistentry>
2237       <term><varname>PG_CONFIG</varname></term>
2238       <listitem>
2239        <para>
2240         path to <application>pg_config</> program for the
2241         <productname>PostgreSQL</productname> installation to build against
2242         (typically just <literal>pg_config</> to use the first one in your
2243         <varname>PATH</>)
2244        </para>
2245       </listitem>
2246      </varlistentry>
2247     </variablelist>
2248    </para>
2249
2250    <para>
2251     Put this makefile as <literal>Makefile</literal> in the directory
2252     which holds your extension. Then you can do
2253     <literal>make</literal> to compile, and later <literal>make
2254     install</literal> to install your module.  By default, the extension is
2255     compiled and installed for the
2256     <productname>PostgreSQL</productname> installation that
2257     corresponds to the first <command>pg_config</command> program
2258     found in your path.  You can use a different installation by
2259     setting <varname>PG_CONFIG</varname> to point to its
2260     <command>pg_config</command> program, either within the makefile
2261     or on the <literal>make</literal> command line.
2262    </para>
2263
2264    <caution>
2265     <para>
2266      Changing <varname>PG_CONFIG</varname> only works when building
2267      against <productname>PostgreSQL</productname> 8.3 or later.
2268      With older releases it does not work to set it to anything except
2269      <literal>pg_config</>; you must alter your <varname>PATH</>
2270      to select the installation to build against.
2271     </para>
2272    </caution>
2273
2274    <para>
2275     The scripts listed in the <varname>REGRESS</> variable are used for
2276     regression testing of your module, just like <literal>make
2277     installcheck</literal> is used for the main
2278     <productname>PostgreSQL</productname> server.  For this to work you need
2279     to have a subdirectory named <literal>sql/</literal> in your extension's
2280     directory, within which you put one file for each group of tests you want
2281     to run.  The files should have extension <literal>.sql</literal>, which
2282     should not be included in the <varname>REGRESS</varname> list in the
2283     makefile.  For each test there should be a file containing the expected
2284     result in a subdirectory named <literal>expected/</literal>, with extension
2285     <literal>.out</literal>.  The tests are run by executing <literal>make
2286     installcheck</literal>, and the resulting output will be compared to the
2287     expected files.  The differences will be written to the file
2288     <literal>regression.diffs</literal> in <command>diff -c</command> format.
2289     Note that trying to run a test which is missing the expected file will be
2290     reported as <quote>trouble</quote>, so make sure you have all expected
2291     files.
2292    </para>
2293
2294    <tip>
2295     <para>
2296      The easiest way of creating the expected files is creating empty files,
2297      then carefully inspecting the result files after a test run (to be found
2298      in the <literal>results/</literal> directory), and copying them to
2299      <literal>expected/</literal> if they match what you want from the test.
2300     </para>
2301
2302    </tip>
2303   </sect2>
2304
2305
2306    <sect2>
2307     <title>Composite-Type Arguments</title>
2308
2309     <para>
2310      Composite types do not have a fixed layout like C structures.
2311      Instances of a composite type can contain null fields.  In
2312      addition, composite types that are part of an inheritance
2313      hierarchy can have different fields than other members of the
2314      same inheritance hierarchy.  Therefore,
2315      <productname>PostgreSQL</productname> provides a function
2316      interface for accessing fields of composite types from C.
2317     </para>
2318
2319     <para>
2320      Suppose we want to write a function to answer the query:
2321
2322 <programlisting>
2323 SELECT name, c_overpaid(emp, 1500) AS overpaid
2324     FROM emp
2325     WHERE name = 'Bill' OR name = 'Sam';
2326 </programlisting>
2327
2328      Using call conventions version 0, we can define
2329      <function>c_overpaid</> as:
2330
2331 <programlisting>
2332 #include "postgres.h"
2333 #include "executor/executor.h"  /* for GetAttributeByName() */
2334
2335 bool
2336 c_overpaid(HeapTupleHeader t, /* the current row of emp */
2337            int32 limit)
2338 {
2339     bool isnull;
2340     int32 salary;
2341
2342     salary = DatumGetInt32(GetAttributeByName(t, "salary", &amp;isnull));
2343     if (isnull)
2344         return false;
2345     return salary &gt; limit;
2346 }
2347 </programlisting>
2348
2349      In version-1 coding, the above would look like this:
2350
2351 <programlisting>
2352 #include "postgres.h"
2353 #include "executor/executor.h"  /* for GetAttributeByName() */
2354
2355 PG_FUNCTION_INFO_V1(c_overpaid);
2356
2357 Datum
2358 c_overpaid(PG_FUNCTION_ARGS)
2359 {
2360     HeapTupleHeader  t = PG_GETARG_HEAPTUPLEHEADER(0);
2361     int32            limit = PG_GETARG_INT32(1);
2362     bool isnull;
2363     Datum salary;
2364
2365     salary = GetAttributeByName(t, "salary", &amp;isnull);
2366     if (isnull)
2367         PG_RETURN_BOOL(false);
2368     /* Alternatively, we might prefer to do PG_RETURN_NULL() for null salary. */
2369
2370     PG_RETURN_BOOL(DatumGetInt32(salary) &gt; limit);
2371 }
2372 </programlisting>
2373     </para>
2374
2375     <para>
2376      <function>GetAttributeByName</function> is the
2377      <productname>PostgreSQL</productname> system function that
2378      returns attributes out of the specified row.  It has
2379      three arguments: the argument of type <type>HeapTupleHeader</type> passed
2380      into
2381      the  function, the name of the desired attribute, and a
2382      return parameter that tells whether  the  attribute
2383      is  null.   <function>GetAttributeByName</function> returns a <type>Datum</type>
2384      value that you can convert to the proper data type by using the
2385      appropriate <function>DatumGet<replaceable>XXX</replaceable>()</function>
2386      macro.  Note that the return value is meaningless if the null flag is
2387      set; always check the null flag before trying to do anything with the
2388      result.
2389     </para>
2390
2391     <para>
2392      There is also <function>GetAttributeByNum</function>, which selects
2393      the target attribute by column number instead of name.
2394     </para>
2395
2396     <para>
2397      The following command declares the function
2398      <function>c_overpaid</function> in SQL:
2399
2400 <programlisting>
2401 CREATE FUNCTION c_overpaid(emp, integer) RETURNS boolean
2402     AS '<replaceable>DIRECTORY</replaceable>/funcs', 'c_overpaid'
2403     LANGUAGE C STRICT;
2404 </programlisting>
2405
2406      Notice we have used <literal>STRICT</> so that we did not have to
2407      check whether the input arguments were NULL.
2408     </para>
2409    </sect2>
2410
2411    <sect2>
2412     <title>Returning Rows (Composite Types)</title>
2413
2414     <para>
2415      To return a row or composite-type value from a C-language
2416      function, you can use a special API that provides macros and
2417      functions to hide most of the complexity of building composite
2418      data types.  To use this API, the source file must include:
2419 <programlisting>
2420 #include "funcapi.h"
2421 </programlisting>
2422     </para>
2423
2424     <para>
2425      There are two ways you can build a composite data value (henceforth
2426      a <quote>tuple</>): you can build it from an array of Datum values,
2427      or from an array of C strings that can be passed to the input
2428      conversion functions of the tuple's column data types.  In either
2429      case, you first need to obtain or construct a <structname>TupleDesc</>
2430      descriptor for the tuple structure.  When working with Datums, you
2431      pass the <structname>TupleDesc</> to <function>BlessTupleDesc</>,
2432      and then call <function>heap_form_tuple</> for each row.  When working
2433      with C strings, you pass the <structname>TupleDesc</> to
2434      <function>TupleDescGetAttInMetadata</>, and then call
2435      <function>BuildTupleFromCStrings</> for each row.  In the case of a
2436      function returning a set of tuples, the setup steps can all be done
2437      once during the first call of the function.
2438     </para>
2439
2440     <para>
2441      Several helper functions are available for setting up the needed
2442      <structname>TupleDesc</>.  The recommended way to do this in most
2443      functions returning composite values is to call:
2444 <programlisting>
2445 TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo,
2446                                    Oid *resultTypeId,
2447                                    TupleDesc *resultTupleDesc)
2448 </programlisting>
2449      passing the same <literal>fcinfo</> struct passed to the calling function
2450      itself.  (This of course requires that you use the version-1
2451      calling conventions.)  <varname>resultTypeId</> can be specified
2452      as <literal>NULL</> or as the address of a local variable to receive the
2453      function's result type OID.  <varname>resultTupleDesc</> should be the
2454      address of a local <structname>TupleDesc</> variable.  Check that the
2455      result is <literal>TYPEFUNC_COMPOSITE</>; if so,
2456      <varname>resultTupleDesc</> has been filled with the needed
2457      <structname>TupleDesc</>.  (If it is not, you can report an error along
2458      the lines of <quote>function returning record called in context that
2459      cannot accept type record</quote>.)
2460     </para>
2461
2462     <tip>
2463      <para>
2464       <function>get_call_result_type</> can resolve the actual type of a
2465       polymorphic function result; so it is useful in functions that return
2466       scalar polymorphic results, not only functions that return composites.
2467       The <varname>resultTypeId</> output is primarily useful for functions
2468       returning polymorphic scalars.
2469      </para>
2470     </tip>
2471
2472     <note>
2473      <para>
2474       <function>get_call_result_type</> has a sibling
2475       <function>get_expr_result_type</>, which can be used to resolve the
2476       expected output type for a function call represented by an expression
2477       tree.  This can be used when trying to determine the result type from
2478       outside the function itself.  There is also
2479       <function>get_func_result_type</>, which can be used when only the
2480       function's OID is available.  However these functions are not able
2481       to deal with functions declared to return <structname>record</>, and
2482       <function>get_func_result_type</> cannot resolve polymorphic types,
2483       so you should preferentially use <function>get_call_result_type</>.
2484      </para>
2485     </note>
2486
2487     <para>
2488      Older, now-deprecated functions for obtaining
2489      <structname>TupleDesc</>s are:
2490 <programlisting>
2491 TupleDesc RelationNameGetTupleDesc(const char *relname)
2492 </programlisting>
2493      to get a <structname>TupleDesc</> for the row type of a named relation,
2494      and:
2495 <programlisting>
2496 TupleDesc TypeGetTupleDesc(Oid typeoid, List *colaliases)
2497 </programlisting>
2498      to get a <structname>TupleDesc</> based on a type OID. This can
2499      be used to get a <structname>TupleDesc</> for a base or
2500      composite type.  It will not work for a function that returns
2501      <structname>record</>, however, and it cannot resolve polymorphic
2502      types.
2503     </para>
2504
2505     <para>
2506      Once you have a <structname>TupleDesc</>, call:
2507 <programlisting>
2508 TupleDesc BlessTupleDesc(TupleDesc tupdesc)
2509 </programlisting>
2510      if you plan to work with Datums, or:
2511 <programlisting>
2512 AttInMetadata *TupleDescGetAttInMetadata(TupleDesc tupdesc)
2513 </programlisting>
2514      if you plan to work with C strings.  If you are writing a function
2515      returning set, you can save the results of these functions in the
2516      <structname>FuncCallContext</> structure &mdash; use the
2517      <structfield>tuple_desc</> or <structfield>attinmeta</> field
2518      respectively.
2519     </para>
2520
2521     <para>
2522      When working with Datums, use:
2523 <programlisting>
2524 HeapTuple heap_form_tuple(TupleDesc tupdesc, Datum *values, bool *isnull)
2525 </programlisting>
2526      to build a <structname>HeapTuple</> given user data in Datum form.
2527     </para>
2528
2529     <para>
2530      When working with C strings, use:
2531 <programlisting>
2532 HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
2533 </programlisting>
2534      to build a <structname>HeapTuple</> given user data
2535      in C string form.  <literal>values</literal> is an array of C strings,
2536      one for each attribute of the return row. Each C string should be in
2537      the form expected by the input function of the attribute data
2538      type. In order to return a null value for one of the attributes,
2539      the corresponding pointer in the <parameter>values</> array
2540      should be set to <symbol>NULL</>.  This function will need to
2541      be called again for each row you return.
2542     </para>
2543
2544     <para>
2545      Once you have built a tuple to return from your function, it
2546      must be converted into a <type>Datum</>. Use:
2547 <programlisting>
2548 HeapTupleGetDatum(HeapTuple tuple)
2549 </programlisting>
2550      to convert a <structname>HeapTuple</> into a valid Datum.  This
2551      <type>Datum</> can be returned directly if you intend to return
2552      just a single row, or it can be used as the current return value
2553      in a set-returning function.
2554     </para>
2555
2556     <para>
2557      An example appears in the next section.
2558     </para>
2559
2560    </sect2>
2561
2562    <sect2 id="xfunc-c-return-set">
2563     <title>Returning Sets</title>
2564
2565     <para>
2566      There is also a special API that provides support for returning
2567      sets (multiple rows) from a C-language function.  A set-returning
2568      function must follow the version-1 calling conventions.  Also,
2569      source files must include <filename>funcapi.h</filename>, as
2570      above.
2571     </para>
2572
2573     <para>
2574      A set-returning function (<acronym>SRF</>) is called
2575      once for each item it returns.  The <acronym>SRF</> must
2576      therefore save enough state to remember what it was doing and
2577      return the next item on each call.
2578      The structure <structname>FuncCallContext</> is provided to help
2579      control this process.  Within a function, <literal>fcinfo-&gt;flinfo-&gt;fn_extra</>
2580      is used to hold a pointer to <structname>FuncCallContext</>
2581      across calls.
2582 <programlisting>
2583 typedef struct
2584 {
2585     /*
2586      * Number of times we've been called before
2587      *
2588      * call_cntr is initialized to 0 for you by SRF_FIRSTCALL_INIT(), and
2589      * incremented for you every time SRF_RETURN_NEXT() is called.
2590      */
2591     uint32 call_cntr;
2592
2593     /*
2594      * OPTIONAL maximum number of calls
2595      *
2596      * max_calls is here for convenience only and setting it is optional.
2597      * If not set, you must provide alternative means to know when the
2598      * function is done.
2599      */
2600     uint32 max_calls;
2601
2602     /*
2603      * OPTIONAL pointer to result slot
2604      *
2605      * This is obsolete and only present for backwards compatibility, viz,
2606      * user-defined SRFs that use the deprecated TupleDescGetSlot().
2607      */
2608     TupleTableSlot *slot;
2609
2610     /*
2611      * OPTIONAL pointer to miscellaneous user-provided context information
2612      *
2613      * user_fctx is for use as a pointer to your own data to retain
2614      * arbitrary context information between calls of your function.
2615      */
2616     void *user_fctx;
2617
2618     /*
2619      * OPTIONAL pointer to struct containing attribute type input metadata
2620      *
2621      * attinmeta is for use when returning tuples (i.e., composite data types)
2622      * and is not used when returning base data types. It is only needed
2623      * if you intend to use BuildTupleFromCStrings() to create the return
2624      * tuple.
2625      */
2626     AttInMetadata *attinmeta;
2627
2628     /*
2629      * memory context used for structures that must live for multiple calls
2630      *
2631      * multi_call_memory_ctx is set by SRF_FIRSTCALL_INIT() for you, and used
2632      * by SRF_RETURN_DONE() for cleanup. It is the most appropriate memory
2633      * context for any memory that is to be reused across multiple calls
2634      * of the SRF.
2635      */
2636     MemoryContext multi_call_memory_ctx;
2637
2638     /*
2639      * OPTIONAL pointer to struct containing tuple description
2640      *
2641      * tuple_desc is for use when returning tuples (i.e. composite data types)
2642      * and is only needed if you are going to build the tuples with
2643      * heap_form_tuple() rather than with BuildTupleFromCStrings().  Note that
2644      * the TupleDesc pointer stored here should usually have been run through
2645      * BlessTupleDesc() first.
2646      */
2647     TupleDesc tuple_desc;
2648
2649 } FuncCallContext;
2650 </programlisting>
2651     </para>
2652
2653     <para>
2654      An <acronym>SRF</> uses several functions and macros that
2655      automatically manipulate the <structname>FuncCallContext</>
2656      structure (and expect to find it via <literal>fn_extra</>).  Use:
2657 <programlisting>
2658 SRF_IS_FIRSTCALL()
2659 </programlisting>
2660      to determine if your function is being called for the first or a
2661      subsequent time. On the first call (only) use:
2662 <programlisting>
2663 SRF_FIRSTCALL_INIT()
2664 </programlisting>
2665      to initialize the <structname>FuncCallContext</>. On every function call,
2666      including the first, use:
2667 <programlisting>
2668 SRF_PERCALL_SETUP()
2669 </programlisting>
2670      to properly set up for using the <structname>FuncCallContext</>
2671      and clearing any previously returned data left over from the
2672      previous pass.
2673     </para>
2674
2675     <para>
2676      If your function has data to return, use:
2677 <programlisting>
2678 SRF_RETURN_NEXT(funcctx, result)
2679 </programlisting>
2680      to return it to the caller.  (<literal>result</> must be of type
2681      <type>Datum</>, either a single value or a tuple prepared as
2682      described above.)  Finally, when your function is finished
2683      returning data, use:
2684 <programlisting>
2685 SRF_RETURN_DONE(funcctx)
2686 </programlisting>
2687      to clean up and end the <acronym>SRF</>.
2688     </para>
2689
2690     <para>
2691      The memory context that is current when the <acronym>SRF</> is called is
2692      a transient context that will be cleared between calls.  This means
2693      that you do not need to call <function>pfree</> on everything
2694      you allocated using <function>palloc</>; it will go away anyway.  However, if you want to allocate
2695      any data structures to live across calls, you need to put them somewhere
2696      else.  The memory context referenced by
2697      <structfield>multi_call_memory_ctx</> is a suitable location for any
2698      data that needs to survive until the <acronym>SRF</> is finished running.  In most
2699      cases, this means that you should switch into
2700      <structfield>multi_call_memory_ctx</> while doing the first-call setup.
2701     </para>
2702
2703     <para>
2704      A complete pseudo-code example looks like the following:
2705 <programlisting>
2706 Datum
2707 my_set_returning_function(PG_FUNCTION_ARGS)
2708 {
2709     FuncCallContext  *funcctx;
2710     Datum             result;
2711     MemoryContext     oldcontext;
2712     <replaceable>further declarations as needed</replaceable>
2713
2714     if (SRF_IS_FIRSTCALL())
2715     {
2716         funcctx = SRF_FIRSTCALL_INIT();
2717         oldcontext = MemoryContextSwitchTo(funcctx-&gt;multi_call_memory_ctx);
2718         /* One-time setup code appears here: */
2719         <replaceable>user code</replaceable>
2720         <replaceable>if returning composite</replaceable>
2721             <replaceable>build TupleDesc, and perhaps AttInMetadata</replaceable>
2722         <replaceable>endif returning composite</replaceable>
2723         <replaceable>user code</replaceable>
2724         MemoryContextSwitchTo(oldcontext);
2725     }
2726
2727     /* Each-time setup code appears here: */
2728     <replaceable>user code</replaceable>
2729     funcctx = SRF_PERCALL_SETUP();
2730     <replaceable>user code</replaceable>
2731
2732     /* this is just one way we might test whether we are done: */
2733     if (funcctx-&gt;call_cntr &lt; funcctx-&gt;max_calls)
2734     {
2735         /* Here we want to return another item: */
2736         <replaceable>user code</replaceable>
2737         <replaceable>obtain result Datum</replaceable>
2738         SRF_RETURN_NEXT(funcctx, result);
2739     }
2740     else
2741     {
2742         /* Here we are done returning items and just need to clean up: */
2743         <replaceable>user code</replaceable>
2744         SRF_RETURN_DONE(funcctx);
2745     }
2746 }
2747 </programlisting>
2748     </para>
2749
2750     <para>
2751      A complete example of a simple <acronym>SRF</> returning a composite type
2752      looks like:
2753 <programlisting>
2754 PG_FUNCTION_INFO_V1(retcomposite);
2755
2756 Datum
2757 retcomposite(PG_FUNCTION_ARGS)
2758 {
2759     FuncCallContext     *funcctx;
2760     int                  call_cntr;
2761     int                  max_calls;
2762     TupleDesc            tupdesc;
2763     AttInMetadata       *attinmeta;
2764
2765      /* stuff done only on the first call of the function */
2766      if (SRF_IS_FIRSTCALL())
2767      {
2768         MemoryContext   oldcontext;
2769
2770         /* create a function context for cross-call persistence */
2771         funcctx = SRF_FIRSTCALL_INIT();
2772
2773         /* switch to memory context appropriate for multiple function calls */
2774         oldcontext = MemoryContextSwitchTo(funcctx-&gt;multi_call_memory_ctx);
2775
2776         /* total number of tuples to be returned */
2777         funcctx-&gt;max_calls = PG_GETARG_UINT32(0);
2778
2779         /* Build a tuple descriptor for our result type */
2780         if (get_call_result_type(fcinfo, NULL, &amp;tupdesc) != TYPEFUNC_COMPOSITE)
2781             ereport(ERROR,
2782                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2783                      errmsg("function returning record called in context "
2784                             "that cannot accept type record")));
2785
2786         /*
2787          * generate attribute metadata needed later to produce tuples from raw
2788          * C strings
2789          */
2790         attinmeta = TupleDescGetAttInMetadata(tupdesc);
2791         funcctx-&gt;attinmeta = attinmeta;
2792
2793         MemoryContextSwitchTo(oldcontext);
2794     }
2795
2796     /* stuff done on every call of the function */
2797     funcctx = SRF_PERCALL_SETUP();
2798
2799     call_cntr = funcctx-&gt;call_cntr;
2800     max_calls = funcctx-&gt;max_calls;
2801     attinmeta = funcctx-&gt;attinmeta;
2802
2803     if (call_cntr &lt; max_calls)    /* do when there is more left to send */
2804     {
2805         char       **values;
2806         HeapTuple    tuple;
2807         Datum        result;
2808
2809         /*
2810          * Prepare a values array for building the returned tuple.
2811          * This should be an array of C strings which will
2812          * be processed later by the type input functions.
2813          */
2814         values = (char **) palloc(3 * sizeof(char *));
2815         values[0] = (char *) palloc(16 * sizeof(char));
2816         values[1] = (char *) palloc(16 * sizeof(char));
2817         values[2] = (char *) palloc(16 * sizeof(char));
2818
2819         snprintf(values[0], 16, "%d", 1 * PG_GETARG_INT32(1));
2820         snprintf(values[1], 16, "%d", 2 * PG_GETARG_INT32(1));
2821         snprintf(values[2], 16, "%d", 3 * PG_GETARG_INT32(1));
2822
2823         /* build a tuple */
2824         tuple = BuildTupleFromCStrings(attinmeta, values);
2825
2826         /* make the tuple into a datum */
2827         result = HeapTupleGetDatum(tuple);
2828
2829         /* clean up (this is not really necessary) */
2830         pfree(values[0]);
2831         pfree(values[1]);
2832         pfree(values[2]);
2833         pfree(values);
2834
2835         SRF_RETURN_NEXT(funcctx, result);
2836     }
2837     else    /* do when there is no more left */
2838     {
2839         SRF_RETURN_DONE(funcctx);
2840     }
2841 }
2842 </programlisting>
2843
2844      One way to declare this function in SQL is:
2845 <programlisting>
2846 CREATE TYPE __retcomposite AS (f1 integer, f2 integer, f3 integer);
2847
2848 CREATE OR REPLACE FUNCTION retcomposite(integer, integer)
2849     RETURNS SETOF __retcomposite
2850     AS '<replaceable>filename</>', 'retcomposite'
2851     LANGUAGE C IMMUTABLE STRICT;
2852 </programlisting>
2853      A different way is to use OUT parameters:
2854 <programlisting>
2855 CREATE OR REPLACE FUNCTION retcomposite(IN integer, IN integer,
2856     OUT f1 integer, OUT f2 integer, OUT f3 integer)
2857     RETURNS SETOF record
2858     AS '<replaceable>filename</>', 'retcomposite'
2859     LANGUAGE C IMMUTABLE STRICT;
2860 </programlisting>
2861      Notice that in this method the output type of the function is formally
2862      an anonymous <structname>record</> type.
2863     </para>
2864
2865     <para>
2866      The directory <filename>contrib/tablefunc</> in the source
2867      distribution contains more examples of set-returning functions.
2868     </para>
2869    </sect2>
2870
2871    <sect2>
2872     <title>Polymorphic Arguments and Return Types</title>
2873
2874     <para>
2875      C-language functions can be declared to accept and
2876      return the polymorphic types
2877      <type>anyelement</type>, <type>anyarray</type>, <type>anynonarray</type>,
2878      and <type>anyenum</type>.
2879      See <xref linkend="extend-types-polymorphic"> for a more detailed explanation
2880      of polymorphic functions. When function arguments or return types
2881      are defined as polymorphic types, the function author cannot know
2882      in advance what data type it will be called with, or
2883      need to return. There are two routines provided in <filename>fmgr.h</>
2884      to allow a version-1 C function to discover the actual data types
2885      of its arguments and the type it is expected to return. The routines are
2886      called <literal>get_fn_expr_rettype(FmgrInfo *flinfo)</> and
2887      <literal>get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)</>.
2888      They return the result or argument type OID, or <symbol>InvalidOid</symbol> if the
2889      information is not available.
2890      The structure <literal>flinfo</> is normally accessed as
2891      <literal>fcinfo-&gt;flinfo</>. The parameter <literal>argnum</>
2892      is zero based.  <function>get_call_result_type</> can also be used
2893      as an alternative to <function>get_fn_expr_rettype</>.
2894     </para>
2895
2896     <para>
2897      For example, suppose we want to write a function to accept a single
2898      element of any type, and return a one-dimensional array of that type:
2899
2900 <programlisting>
2901 PG_FUNCTION_INFO_V1(make_array);
2902 Datum
2903 make_array(PG_FUNCTION_ARGS)
2904 {
2905     ArrayType  *result;
2906     Oid         element_type = get_fn_expr_argtype(fcinfo-&gt;flinfo, 0);
2907     Datum       element;
2908     bool        isnull;
2909     int16       typlen;
2910     bool        typbyval;
2911     char        typalign;
2912     int         ndims;
2913     int         dims[MAXDIM];
2914     int         lbs[MAXDIM];
2915
2916     if (!OidIsValid(element_type))
2917         elog(ERROR, "could not determine data type of input");
2918
2919     /* get the provided element, being careful in case it's NULL */
2920     isnull = PG_ARGISNULL(0);
2921     if (isnull)
2922         element = (Datum) 0;
2923     else
2924         element = PG_GETARG_DATUM(0);
2925
2926     /* we have one dimension */
2927     ndims = 1;
2928     /* and one element */
2929     dims[0] = 1;
2930     /* and lower bound is 1 */
2931     lbs[0] = 1;
2932
2933     /* get required info about the element type */
2934     get_typlenbyvalalign(element_type, &amp;typlen, &amp;typbyval, &amp;typalign);
2935
2936     /* now build the array */
2937     result = construct_md_array(&amp;element, &amp;isnull, ndims, dims, lbs,
2938                                 element_type, typlen, typbyval, typalign);
2939
2940     PG_RETURN_ARRAYTYPE_P(result);
2941 }
2942 </programlisting>
2943     </para>
2944
2945     <para>
2946      The following command declares the function
2947      <function>make_array</function> in SQL:
2948
2949 <programlisting>
2950 CREATE FUNCTION make_array(anyelement) RETURNS anyarray
2951     AS '<replaceable>DIRECTORY</replaceable>/funcs', 'make_array'
2952     LANGUAGE C IMMUTABLE;
2953 </programlisting>
2954     </para>
2955    </sect2>
2956    <sect2>
2957     <title>Shared Memory and LWLocks</title>
2958
2959     <para>
2960      Add-ins can reserve LWLocks and an allocation of shared memory on server
2961      startup.  The add-in's shared library must be preloaded by specifying
2962      it in
2963      <xref linkend="guc-shared-preload-libraries"><indexterm><primary>shared-preload-libraries</></>.
2964      Shared memory is reserved by calling:
2965 <programlisting>
2966 void RequestAddinShmemSpace(int size)
2967 </programlisting>
2968      from your <function>_PG_init</> function.
2969     </para>
2970     <para>
2971      LWLocks are reserved by calling:
2972 <programlisting>
2973 void RequestAddinLWLocks(int n)
2974 </programlisting>
2975      from <function>_PG_init</>.
2976     </para>
2977     <para>
2978      To avoid possible race-conditions, each backend should use the LWLock
2979      <function>AddinShmemInitLock</> when connecting to and initializing
2980      its allocation of shared memory, as shown here:
2981 <programlisting>
2982         static mystruct *ptr = NULL;
2983
2984         if (!ptr)
2985         {
2986                 bool    found;
2987
2988                 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
2989                 ptr = ShmemInitStruct("my struct name", size, &amp;found);
2990                 if (!ptr)
2991                         elog(ERROR, "out of shared memory");
2992                 if (!found)
2993                 {
2994                         initialize contents of shmem area;
2995                         acquire any requested LWLocks using:
2996                         ptr->mylockid = LWLockAssign();
2997                 }
2998                 LWLockRelease(AddinShmemInitLock);
2999         }
3000 </programlisting>
3001     </para>
3002    </sect2>
3003   </sect1>