From e6c039d13e16a3a2dec5ba479d9d1fb3229c03a3 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 28 Mar 2018 14:22:42 -0700
Subject: [PATCH] Add documentation for the JIT feature.

As promised in earlier commits, this adds documentation about the new
build options, the new GUCs, about the planner logic when JIT is used,
and the benefits of JIT in general.

Also adds a more implementation oriented README.

I'm sure we're going to want to expand this further, but I think this
is a reasonable start.

Author: Andres Freund, with contributions by Thomas Munro
Reviewed-By: Thomas Munro
Discussion: https://postgr.es/m/20170901064131.tazjxwus3k2w3ybh@alap3.anarazel.de
---
 doc/src/sgml/acronyms.sgml     |  10 ++
 doc/src/sgml/config.sgml       | 183 +++++++++++++++++++-
 doc/src/sgml/filelist.sgml     |   1 +
 doc/src/sgml/func.sgml         |   8 +
 doc/src/sgml/installation.sgml |  53 ++++++
 doc/src/sgml/jit.sgml          | 299 +++++++++++++++++++++++++++++++++
 doc/src/sgml/postgres.sgml     |   1 +
 doc/src/sgml/storage.sgml      |   2 +-
 src/backend/jit/README         | 289 +++++++++++++++++++++++++++++++
 9 files changed, 844 insertions(+), 2 deletions(-)
 create mode 100644 doc/src/sgml/jit.sgml
 create mode 100644 src/backend/jit/README
diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml
index 751c46de6d..638ffc9fe8 100644
--- a/doc/src/sgml/acronyms.sgml
+++ b/doc/src/sgml/acronyms.sgml
@@ -369,6 +369,16 @@
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><acronym>JIT</acronym></term>
+    <listitem>
+     <para>
+      <ulink url="https://en.wikipedia.org/wiki/Just-in-time_compilation">Just-in-Time
+      compilation</ulink>
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><acronym>JSON</acronym></term>
     <listitem>
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4d899e3b24..dc9ed22eb4 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4136,6 +4136,62 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+
+     <varlistentry id="guc-jit-above-cost" xreflabel="jit_above_cost">
+      <term><varname>jit_above_cost</varname> (<type>floating point</type>)
+      <indexterm>
+       <primary><varname>jit_above_cost</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the planner's cutoff above which JIT compilation is used as part
+        of query execution (see <xref linkend="jit"/>). Performing
+        <acronym>JIT</acronym> costs time but can accelerate query execution.
+
+        The default is <literal>100000</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-jit-optimize-above-cost" xreflabel="jit_optimize_above_cost">
+      <term><varname>jit_optimize_above_cost</varname> (<type>floating point</type>)
+      <indexterm>
+       <primary><varname>jit_optimize_above_cost</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the planner's cutoff above which JIT compiled programs (see <xref
+        linkend="guc-jit-above-cost"/>) are optimized. Optimization initially
+        takes time, but can improve execution speed.  It is not meaningful to
+        set this to a lower value than <xref linkend="guc-jit-above-cost"/>.
+
+        The default is <literal>500000</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-jit-inline-above-cost" xreflabel="jit_inline_above_cost">
+      <term><varname>jit_inline_above_cost</varname> (<type>floating point</type>)
+      <indexterm>
+       <primary><varname>jit_inline_above_cost</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the planner's cutoff above which JIT compiled programs (see <xref
+        linkend="guc-jit-above-cost"/>) attempt to inline functions and
+        operators. Inlining initially takes time, but can improve execution
+        speed.  It is unlikely to be beneficial to set
+        <varname>jit_inline_above_cost</varname> below
+        <varname>jit_optimize_above_cost</varname>.
+
+        The default is <literal>500000</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
 
     </sect2>
@@ -4418,6 +4474,23 @@ SELECT * FROM parent WHERE key = 2400;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-jit" xreflabel="jit">
+      <term><varname>jit</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>jit</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Determines whether <acronym>JIT</acronym> may be used by
+        <productname>PostgreSQL</productname>, if available (see <xref
+        linkend="jit"/>).
+
+        The default is <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-join-collapse-limit" xreflabel="join_collapse_limit">
       <term><varname>join_collapse_limit</varname> (<type>integer</type>)
       <indexterm>
@@ -7412,6 +7485,29 @@ SET XML OPTION { DOCUMENT | CONTENT };
       </note>
       </listitem>
      </varlistentry>
+
+     <varlistentry id="guc-jit-provider" xreflabel="jit_provider">
+      <term><varname>jit_provider</varname> (<type>string</type>)
+       <indexterm>
+        <primary><varname>jit_provider</varname> configuration parameter</primary>
+       </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Determines which JIT provider (see <xref linkend="jit-extensibility"/>) is
+        used. The built-in default is <literal>llvmjit</literal>.
+       </para>
+       <para>
+        If set to a non-existent library <acronym>JIT</acronym> will not
+        available, but no error will be raised. This allows JIT support to be
+        installed separately from the main
+        <productname>PostgreSQL</productname> package.
+
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
    </sect2>
 
@@ -8658,7 +8754,92 @@ LOG:  CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1)
        </para>
       </listitem>
      </varlistentry>
-   </variablelist>
+
+     <varlistentry id="guc-jit-debugging-support" xreflabel="jit_debugging_support">
+      <term><varname>jit_debugging_support</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>jit_debugging_support</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If LLVM has the required functionality, register generated functions
+        with <productname>GDB</productname>.  This makes debugging easier.
+
+        The default setting is <literal>off</literal>, and can only be set at
+        server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-jit-dump-bitcode" xreflabel="jit_dump_bitcode">
+      <term><varname>jit_dump_bitcode</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>jit_dump_bitcode</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Writes the generated <productname>LLVM</productname> IR out to the
+        filesystem, inside <xref linkend="guc-data-directory"/>. This is only
+        useful for working on the internals of the JIT implementation.
+
+        The default setting is <literal>off</literal>, and it can only be
+        changed by a superuser.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-jit-expressions" xreflabel="jit_expressions">
+      <term><varname>jit_expressions</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>jit_expressions</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Determines whether expressions are JIT compiled, subject to costing
+        decisions (see <xref linkend="jit-decision"/>).  The default is
+        <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-jit-profiling-support" xreflabel="jit_profiling_support">
+      <term><varname>jit_profiling_support</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>jit_profiling_support</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If LLVM has the required functionality, emit required data to allow
+        <productname>perf</productname> to profile functions generated by JIT.
+        This writes out files to <filename>$HOME/.debug/jit/</filename>; the
+        user is responsible for performing cleanup when desired.
+
+        The default setting is <literal>off</literal>, and can only be set at
+        server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-jit-tuple-deforming" xreflabel="jit_tuple_deforming">
+      <term><varname>jit_tuple_deforming</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>jit_tuple_deforming</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Determines whether tuple deforming is JIT compiled, subject to costing
+        decisions (see <xref linkend="jit-decision"/>). The default is
+        <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
   </sect1>
   <sect1 id="runtime-config-short">
    <title>Short Options</title>
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 732b8ab7d0..56b8da0448 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -48,6 +48,7 @@
 <!ENTITY user-manag    SYSTEM "user-manag.sgml">
 <!ENTITY wal           SYSTEM "wal.sgml">
 <!ENTITY logical-replication    SYSTEM "logical-replication.sgml">
+<!ENTITY jit    SYSTEM "jit.sgml">
 
 <!-- programmer's guide -->
 <!ENTITY bgworker   SYSTEM "bgworker.sgml">
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7b1a85fc71..9d1772f349 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -15942,6 +15942,14 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n);
        <entry>is schema another session's temporary schema?</entry>
       </row>
 
+      <row>
+       <entry><literal><function>pg_jit_available()</function></literal></entry>
+       <entry><type>boolean</type></entry>
+       <entry>is <acronym>JIT</acronym> available in this session (see <xref
+       linkend="jit"/>)? Returns <literal>false</literal> if <xref
+       linkend="guc-jit"/> is set to false.</entry>
+      </row>
+
       <row>
        <entry><literal><function>pg_listening_channels()</function></literal></entry>
        <entry><type>setof text</type></entry>
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 2d24153bdc..30921cf486 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -758,6 +758,39 @@ su - postgres
        </listitem>
       </varlistentry>
 
+      <varlistentry id="configure-with-llvm">
+       <term><option>--with-llvm</option></term>
+       <listitem>
+        <para>
+         Build with support for <productname>LLVM</productname> based
+         <acronym>JIT</acronym> compilation (see <xref linkend="jit"/>).  This
+         requires the <productname>LLVM</productname> library to be installed.
+         The minimum required version of <productname>LLVM</productname> is
+         currently 3.9.
+        </para>
+        <para>
+         <command>llvm-config</command><indexterm><primary>llvm-config</primary></indexterm>
+         will be used to find the required compilation options.
+         <command>llvm-config</command>, and then
+         <command>llvm-config-$major-$minor</command> for all supported
+         versions, will be searched on <envar>PATH</envar>. If that would not
+         yield the correct binary, use <envar>LLVM_CONFIG</envar> to specify a
+         path to the correct <command>llvm-config</command>. For example
+<programlisting>
+./configure ... --with-llvm LLVM_CONFIG='/path/to/llvm/bin/llvm-config'
+</programlisting>
+        </para>
+
+        <para>
+         <productname>LLVM</productname> support requires a compatible
+         <command>clang</command> compiler (specified, if necessary, using the
+         <envar>CLANG</envar> environment variable), and a working C++
+         compiler (specified, if necessary, using the <envar>CXX</envar>
+         environment variable).
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry>
        <term><option>--with-icu</option></term>
        <listitem>
@@ -1342,6 +1375,16 @@ su - postgres
        </listitem>
       </varlistentry>
 
+      <varlistentry>
+       <term><envar>CLANG</envar></term>
+       <listitem>
+        <para>
+         path to <command>clang</command> program used to process source code
+         for inlining when compiling with <literal>--with-llvm</literal>
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry>
        <term><envar>CPP</envar></term>
        <listitem>
@@ -1432,6 +1475,16 @@ su - postgres
        </listitem>
       </varlistentry>
 
+      <varlistentry>
+       <term><envar>LLVM_CONFIG</envar></term>
+       <listitem>
+        <para>
+         <command>llvm-config</command> program used to locate the
+         <productname>LLVM</productname> installation.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry>
        <term><envar>MSGFMT</envar></term>
        <listitem>
diff --git a/doc/src/sgml/jit.sgml b/doc/src/sgml/jit.sgml
new file mode 100644
index 0000000000..f59e4923e1
--- /dev/null
+++ b/doc/src/sgml/jit.sgml
@@ -0,0 +1,299 @@
+<!-- doc/src/sgml/jit.sgml -->
+
+<chapter id="jit">
+ <title>Just-in-Time Compilation (<acronym>JIT</acronym>)</title>
+
+ <indexterm zone="jit">
+  <primary><acronym>JIT</acronym></primary>
+ </indexterm>
+
+ <indexterm>
+  <primary>Just-In-Time compilation</primary>
+  <see><acronym>JIT</acronym></see>
+ </indexterm>
+
+ <para>
+  This chapter explains what just-in-time compilation is, and how it can be
+  configured in <productname>PostgreSQL</productname>.
+ </para>
+
+ <sect1 id="jit-reason">
+  <title>What is <acronym>JIT</acronym>?</title>
+
+  <para>
+   Just-in-time compilation (<acronym>JIT</acronym>) is the process of turning
+   some form of interpreted program evaluation into a native program, and
+   doing so at runtime.
+
+   For example, instead of using a facility that can evaluate arbitrary SQL
+   expressions to evaluate an SQL predicate like <literal>WHERE a.col =
+   3</literal>, it is possible to generate a function than can be natively
+   executed by the CPU that just handles that expression, yielding a speedup.
+  </para>
+
+  <para>
+   <productname>PostgreSQL</productname> has builtin support perform
+   <acronym>JIT</acronym> using <ulink
+   url="https://llvm.org/"><productname>LLVM</productname></ulink> when built
+   <productname>PostgreSQL</productname> was built with
+   <literal>--with-llvm</literal> (see <xref linkend="configure-with-llvm"/>).
+  </para>
+
+  <para>
+   See <filename>src/backend/jit/README</filename> for further details.
+  </para>
+
+  <sect2 id="jit-accelerated-operations">
+   <title><acronym>JIT</acronym> Accelerated Operations</title>
+   <para>
+    Currently <productname>PostgreSQL</productname>'s <acronym>JIT</acronym>
+    implementation has support for accelerating expression evaluation and
+    tuple deforming.  Several other operations could be accelerated in the
+    future.
+   </para>
+   <para>
+    Expression evaluation is used to evaluate <literal>WHERE</literal>
+    clauses, target lists, aggregates and projections. It can be accelerated
+    by generating code specific to each case.
+   </para>
+   <para>
+    Tuple deforming is the process of transforming an on-disk tuple (see <xref
+    linkend="heaptuple"/>) into its in-memory representation. It can be
+    accelerated by creating a function specific to the table layout and the
+    number of columns to be extracted.
+   </para>
+  </sect2>
+
+  <sect2 id="jit-optimization">
+   <title>Optimization</title>
+   <para>
+    <productname>LLVM</productname> has support for optimizing generated
+    code. Some of the optimizations are cheap enough to be performed whenever
+    <acronym>JIT</acronym> is used, while others are only beneficial for
+    longer running queries.
+
+    See <ulink url="https://llvm.org/docs/Passes.html#transform-passes"/> for
+    more details about optimizations.
+   </para>
+  </sect2>
+
+  <sect2 id="jit-inlining">
+   <title>Inlining</title>
+   <para>
+    <productname>PostgreSQL</productname> is very extensible and allows new
+    datatypes, functions, operators and other database objects to be defined;
+    see <xref linkend="extend"/>. In fact the built-in ones are implemented
+    using nearly the same mechanisms.  This extensibility implies some
+    overhead, for example due to function calls (see <xref linkend="xfunc"/>).
+    To reduce that overhead <acronym>JIT</acronym> compilation can inline the
+    body for small functions into the expression using them. That allows a
+    significant percentage of the overhead to be optimized away.
+   </para>
+  </sect2>
+
+ </sect1>
+
+ <sect1 id="jit-decision">
+  <title>When to <acronym>JIT</acronym>?</title>
+
+  <para>
+   <acronym>JIT</acronym> is beneficial primarily for long-running CPU bound
+   queries. Frequently these will be analytical queries.  For short queries
+   the overhead of performing <acronym>JIT</acronym> will often be higher than
+   the time it can save.
+  </para>
+
+  <para>
+   To determine whether <acronym>JIT</acronym> is used, the total cost of a
+   query (see <xref linkend="planner-stats-details"/> and <xref
+   linkend="runtime-config-query-constants"/>) is used.
+  </para>
+
+  <para>
+   The cost of the query will be compared with <xref
+   linkend="guc-jit-above-cost"/> GUC. If the cost is higher,
+   <acronym>JIT</acronym> compilation will be performed.
+  </para>
+
+  <para>
+   If the planner, based on the above criterion, decided that
+   <acronym>JIT</acronym> is beneficial, two further decisions are
+   made. Firstly, if the query is more costly than the <xref
+   linkend="guc-jit-optimize-above-cost"/>, GUC expensive optimizations are
+   used to improve the generated code. Secondly, if the query is more costly
+   than the <xref linkend="guc-jit-inline-above-cost"/> GUC, short functions
+   and operators used in the query will be inlined.  Both of these operations
+   increase the <acronym>JIT</acronym> overhead, but can reduce query
+   execution time considerably.
+  </para>
+
+  <para>
+   This cost based decision will be made at plan time, not execution
+   time. This means that when prepared statements are in use, and the generic
+   plan is used (see <xref linkend="sql-prepare-notes"/>), the values of the
+   GUCs set at prepare time take effect, not the settings at execution time.
+  </para>
+
+  <note>
+   <para>
+    If <xref linkend="guc-jit"/> is set to <literal>off</literal>, or no
+    <acronym>JIT</acronym> implementation is available (for example because
+    the server was compiled without <literal>--with-llvm</literal>),
+    <acronym>JIT</acronym> will not performed, even if considered to be
+    beneficial based on the above criteria.  Setting <xref linkend="guc-jit"/>
+    to <literal>off</literal> takes effect both at plan and at execution time.
+   </para>
+  </note>
+
+  <para>
+   <xref linkend="sql-explain"/> can be used to see whether
+   <acronym>JIT</acronym> is used or not.  As an example, here is a query that
+   is not using <acronym>JIT</acronym>:
+   <programlisting>
+=# EXPLAIN ANALYZE SELECT SUM(relpages) FROM pg_class;
+âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
+â                                                 QUERY PLAN                                                  â
+âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ¤
+â Aggregate  (cost=16.27..16.29 rows=1 width=8) (actual time=0.303..0.303 rows=1 loops=1)                     â
+â   ->  Seq Scan on pg_class  (cost=0.00..15.42 rows=342 width=4) (actual time=0.017..0.111 rows=356 loops=1) â
+â Planning Time: 0.116 ms                                                                                     â
+â Execution Time: 0.365 ms                                                                                    â
+âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
+(4 rows)
+   </programlisting>
+   Given the cost of the plan, it is entirely reasonable that no
+   <acronym>JIT</acronym> was used, the cost of <acronym>JIT</acronym> would
+   have been bigger than the savings. Adjusting the cost limits will lead to
+   <acronym>JIT</acronym> use:
+   <programlisting>
+=# SET jit_above_cost = 10;
+SET
+=# EXPLAIN ANALYZE SELECT SUM(relpages) FROM pg_class;
+âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
+â                                                 QUERY PLAN                                                  â
+âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ¤
+â Aggregate  (cost=16.27..16.29 rows=1 width=8) (actual time=6.049..6.049 rows=1 loops=1)                     â
+â   ->  Seq Scan on pg_class  (cost=0.00..15.42 rows=342 width=4) (actual time=0.019..0.052 rows=356 loops=1) â
+â Planning Time: 0.133 ms                                                                                     â
+â JIT:                                                                                                        â
+â   Functions: 3                                                                                              â
+â   Generation Time: 1.259 ms                                                                                 â
+â   Inlining: false                                                                                           â
+â   Inlining Time: 0.000 ms                                                                                   â
+â   Optimization: false                                                                                       â
+â   Optimization Time: 0.797 ms                                                                               â
+â   Emission Time: 5.048 ms                                                                                   â
+â Execution Time: 7.416 ms                                                                                    â
+âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
+   </programlisting>
+   As visible here, <acronym>JIT</acronym> was used, but inlining and
+   optimization were not. If <xref linkend="guc-jit-optimize-above-cost"/>,
+   <xref linkend="guc-jit-inline-above-cost"/> were lowered, just like <xref
+   linkend="guc-jit-above-cost"/>, that would change.
+  </para>
+ </sect1>
+
+ <sect1 id="jit-configuration" xreflabel="JIT Configuration">
+  <title>Configuration</title>
+
+  <para>
+   <xref linkend="guc-jit"/> determines whether <acronym>JIT</acronym> is
+   enabled or disabled.
+  </para>
+
+  <para>
+   As explained in <xref linkend="jit-decision"/> the configuration variables
+   <xref linkend="guc-jit-above-cost"/>, <xref
+   linkend="guc-jit-optimize-above-cost"/>, <xref
+   linkend="guc-jit-inline-above-cost"/> decide whether <acronym>JIT</acronym>
+   compilation is performed for a query, and how much effort is spent doing
+   so.
+  </para>
+
+  <para>
+   For development and debugging purposes a few additional GUCs exist. <xref
+   linkend="guc-jit-dump-bitcode"/> allows the generated bitcode to be
+   inspected. <xref linkend="guc-jit-debugging-support"/> allows GDB to see
+   generated functions. <xref linkend="guc-jit-profiling-support"/> emits
+   information so the <productname>perf</productname> profiler can interpret
+   <acronym>JIT</acronym> generated functions sensibly.
+  </para>
+
+  <para>
+   <xref linkend="guc-jit-provider"/> determines which <acronym>JIT</acronym>
+   implementation is used. It rarely is required to be changed. See <xref
+   linkend="jit-pluggable"/>.
+  </para>
+ </sect1>
+
+ <sect1 id="jit-extensibility" xreflabel="JIT Extensibility">
+  <title>Extensibility</title>
+
+  <sect2 id="jit-extensibility-bitcode">
+   <title>Inlining Support for Extensions</title>
+   <para>
+    <productname>PostgreSQL</productname>'s <acronym>JIT</acronym>
+    implementation can inline the implementation of operators and functions
+    (of type <literal>C</literal> and <literal>internal</literal>). See <xref
+    linkend="jit-inlining"/>. To do so for functions in extensions, the
+    definition of these functions needs to be made available. When using <link
+    linkend="extend-pgxs">PGXS</link> to build an extension against a server
+    that has been compiled with LLVM support, the relevant files will be
+    installed automatically.
+   </para>
+
+   <para>
+    The relevant files have to be installed into
+    <filename>$pkglibdir/bitcode/$extension/</filename> and a summary of them
+    to <filename>$pkglibdir/bitcode/$extension.index.bc</filename>, where
+    <literal>$pkglibdir</literal> is the directory returned by
+    <literal>pg_config --pkglibdir</literal> and <literal>$extension</literal>
+    the basename of the extension's shared library.
+
+    <note>
+     <para>
+      For functions built into <productname>PostgreSQL</productname> itself,
+      the bitcode is installed into
+      <literal>$pkglibdir/bitcode/postgres</literal>.
+     </para>
+    </note>
+   </para>
+  </sect2>
+
+  <sect2 id="jit-pluggable">
+   <title>Pluggable <acronym>JIT</acronym> Provider</title>
+
+   <para>
+    <productname>PostgreSQL</productname> provides a <acronym>JIT</acronym>
+    implementation based on <productname>LLVM</productname>.  The interface to
+    the <acronym>JIT</acronym> provider is pluggable and the provider can be
+    changed without recompiling. The provider is chosen via the <xref
+    linkend="guc-jit-provider"/> <acronym>GUC</acronym>.
+   </para>
+
+   <sect3>
+    <title><acronym>JIT</acronym> Provider Interface</title>
+    <para>
+     A <acronym>JIT</acronym> provider is loaded by dynamically loading the
+     named shared library. The normal library search path is used to locate
+     the library. To provide the required <acronym>JIT</acronym> provider
+     callbacks and to indicate that the library is actually a
+     <acronym>JIT</acronym> provider it needs to provide a function named
+     <function>_PG_jit_provider_init</function>. This function is passed a
+     struct that needs to be filled with the callback function pointers for
+     individual actions.
+     <programlisting>
+struct JitProviderCallbacks
+{
+    JitProviderResetAfterErrorCB reset_after_error;
+    JitProviderReleaseContextCB release_context;
+    JitProviderCompileExprCB compile_expr;
+};
+extern void _PG_jit_provider_init(JitProviderCallbacks *cb);
+     </programlisting>
+    </para>
+   </sect3>
+  </sect2>
+ </sect1>
+
+</chapter>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 054347b17d..0070603fc3 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -163,6 +163,7 @@
   &diskusage;
   &wal;
   &logical-replication;
+  &jit;
   &regress;
 
  </part>
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index c0e548fa5b..70a822e059 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -875,7 +875,7 @@ data. Empty in ordinary tables.</entry>
   <filename>src/include/storage/bufpage.h</filename>.
  </para>
 
- <para>
+ <para id="heaptuple">
 
   Following the page header are item identifiers
   (<type>ItemIdData</type>), each requiring four bytes.
diff --git a/src/backend/jit/README b/src/backend/jit/README
new file mode 100644
index 0000000000..b37dcbe0c1
--- /dev/null
+++ b/src/backend/jit/README
@@ -0,0 +1,289 @@
+What is Just-in-Time Compilation?
+=================================
+
+Just-in-Time compilation (JIT) is the process of turning some form of
+interpreted program evaluation into a native program, and doing so at
+runtime.
+
+For example, instead of using a facility that can evaluate arbitrary
+SQL expressions to evaluate an SQL predicate like WHERE a.col = 3, it
+is possible to generate a function than can be natively executed by
+the CPU that just handles that expression, yielding a speedup.
+
+That this is done at query execution time, possibly even only in cases
+the relevant task is done a number of times, makes it JIT, rather than
+ahead-of-time (AOT). Given the way JIT compilation is used in
+postgres, the lines between interpretation, AOT and JIT are somewhat
+blurry.
+
+Note that the interpreted program turned into a native program does
+not necessarily have to be a program in the classical sense. E.g. it
+is highly beneficial JIT compile tuple deforming into a native
+function just handling a specific type of table, despite tuple
+deforming not commonly being understood as a "program".
+
+
+Why JIT?
+========
+
+Parts of postgres are commonly bottlenecked by comparatively small
+pieces of CPU intensive code. In a number of cases that is because the
+relevant code has to be very generic (e.g. handling arbitrary SQL
+level expressions, over arbitrary tables, with arbitrary extensions
+installed). This often leads to a large number of indirect jumps and
+unpredictable branches, and generally a high number of instructions
+for a given task. E.g. just evaluating an expression comparing a
+column in a database to an integer ends up needing several hundred
+cycles.
+
+By generating native code large numbers of indirect jumps can be
+removed by either making them into direct branches (e.g. replacing the
+indirect call to an SQL operator's implementation with a direct call
+to that function), or by removing it entirely (e.g. by evaluating the
+branch at compile time because the input is constant). Similarly a lot
+of branches can be entirely removed (e.g. by again evaluating the
+branch at compile time because the input is constant). The latter is
+particularly beneficial for removing branches during tuple deforming.
+
+
+How to JIT
+==========
+
+Postgres, by default, uses LLVM to perform JIT. LLVM was chosen
+because it is developed by several large corporations and therefore
+unlikely to be discontinued, because it has a license compatible with
+PostgreSQL, and because its LLVM IR can be generated from C
+using the clang compiler.
+
+
+Shared Library Separation
+-------------------------
+
+To avoid the main PostgreSQL binary directly depending on LLVM, which
+would prevent LLVM support being independently installed by OS package
+managers, the LLVM dependent code is located in a shared library that
+is loaded on-demand.
+
+An additional benefit of doing so is that it is relatively easy to
+evaluate JIT compilation that does not use LLVM, by changing out the
+shared library used to provide JIT compilation.
+
+To achieve this code, e.g. expression evaluation, intending to perform
+JIT, calls a LLVM independent wrapper located in jit.c to do so. If
+the shared library providing JIT support can be loaded (i.e. postgres
+was compiled with LLVM support and the shared library is installed),
+the task of JIT compiling an expression gets handed of to shared
+library. This obviously requires that the function in jit.c is allowed
+to fail in case not JIT provider can be loaded.
+
+Which shared library is loaded is determined by the jit_provider GUC,
+defaulting to "llvmjit".
+
+Cloistering code performing JIT into a shared library unfortunately
+also means that code doing JIT compilation for various parts of code
+has to be located separately from the code doing so without
+JIT. E.g. the JITed version of execExprInterp.c is located in
+jit/llvm/ rather than executor/.
+
+
+JIT Context
+-----------
+
+For performance and convenience reasons it is useful to allow JITed
+functions to be emitted and deallocated together. It is e.g. very
+common to create a number of functions at query initialization time,
+use them during query execution, and then deallocate all of them
+together at the end of the query.
+
+Lifetimes of JITed functions are managed via JITContext. Exactly one
+such context should be created for work in which all created JITed
+function should have the same lifetime. E.g. there's exactly one
+JITContext for each query executed, in the query's EState.  Only the
+release of an JITContext is exposed to the provider independent
+facility, as the creation of one is done on-demand by the JIT
+implementations.
+
+Emitting individual functions separately is more expensive than
+emitting several functions at once, and emitting them together can
+provide additional optimization opportunities. To facilitate that the
+LLVM provider separates function definition from emitting them in an
+executable way.
+
+Creating functions into the current mutable module (a module
+essentially is LLVM's equivalent of a translation unit in C) is done
+using
+  extern LLVMModuleRef llvm_mutable_module(LLVMJitContext *context);
+in which it then can emit as much code using the LLVM APIs as it
+wants. Whenever a function actually needs to be called
+  extern void *llvm_get_function(LLVMJitContext *context, const char *funcname);
+returns a pointer to it.
+
+E.g. in the expression evaluation case this setup allows most
+functions in a query to be emitted during ExecInitNode(), delaying the
+function emission to the time the first time a function is actually
+used.
+
+
+Error Handling
+--------------
+
+There are two aspects to error handling.  Firstly, generated (LLVM IR)
+and emitted functions (mmap()ed segments) need to be cleaned up both
+after a successful query execution and after an error. This is done by
+registering each created JITContext with the current resource owner,
+and cleaning it up on error / end of transaction. If it is desirable
+to release resources earlier, jit_release_context() can be used.
+
+The second, less pretty, aspect of error handling is OOM handling
+inside LLVM itself. The above resowner based mechanism takes care of
+cleaning up emitted code upon ERROR, but there's also the chance that
+LLVM itself runs out of memory. LLVM by default does *not* use any C++
+exceptions. Its allocations are primarily funneled through the
+standard "new" handlers, and some direct use of malloc() and
+mmap(). For the former a 'new handler' exists
+http://en.cppreference.com/w/cpp/memory/new/set_new_handler for the
+latter LLVM provides callback that get called upon failure
+(unfortunately mmap() failures are treated as fatal rather than OOM
+errors).  What we've, for now, chosen to do, is to have two functions
+that LLVM using code must use:
+extern void llvm_enter_fatal_on_oom(void);
+extern void llvm_leave_fatal_on_oom(void);
+before interacting with LLVM code.
+
+When a libstdc++ new or LLVM error occurs, the handlers set up by the
+above functions trigger a FATAL error. We have to use FATAL rather
+than ERROR, as we *cannot* reliably throw ERROR inside a foreign
+library without risking corrupting its internal state.
+
+Users of the above sections do *not* have to use PG_TRY/CATCH blocks,
+the handlers instead are reset on toplevel sigsetjmp() level.
+
+Using a relatively small enter/leave protected section of code, rather
+than setting up these handlers globally, avoids negative interactions
+with extensions that might use C++ like e.g. postgis. As LLVM code
+generation should never execute arbitrary code, just setting these
+handlers temporarily ought to suffice.
+
+
+Type Synchronization
+--------------------
+
+To able to generate code performing tasks that are done in "interpreted"
+postgres, it obviously is required that code generation knows about at
+least a few postgres types.  While it is possible to inform LLVM about
+type definitions by recreating them manually in C code, that is failure
+prone and labor intensive.
+
+Instead the is one small file (llvmjit_types.c) which references each of
+the types required for JITing. That file is translated to bitcode at
+compile time, and loaded when LLVM is initialized in a backend.
+
+That works very well to synchronize the type definition, unfortunately
+it does *not* synchronize offsets as the IR level representation doesn't
+know field names.  Instead required offsets are maintained as defines in
+the original struct definition. E.g.
+#define FIELDNO_TUPLETABLESLOT_NVALID 9
+        int                     tts_nvalid;             /* # of valid values in tts_values */
+while that still needs to be defined, it's only required for a
+relatively small number of fields, and it's bunched together with the
+struct definition, so it's easily kept synchronized.
+
+
+Inlining
+--------
+
+One big advantage of JITing expressions is that it can significantly
+reduce the overhead of postgres's extensible function/operator
+mechanism, by inlining the body of called functions / operators.
+
+It obviously is undesirable to maintain a second implementation of
+commonly used functions, just for inlining purposes. Instead we take
+advantage of the fact that the clang compiler can emit LLVM IR.
+
+The ability to do so allows us to get the LLVM IR for all operators
+(e.g. int8eq, float8pl etc), without maintaining two copies.  These
+bitcode files get installed into the server's
+  $pkglibdir/bitcode/postgres/
+Using existing LLVM functionality (for parallel LTO compilation),
+additionally an index is over these is stored to
+$pkglibdir/bitcode/postgres.index.bc
+
+Similarly extensions can install code into
+  $pkglibdir/bitcode/[extension]/
+accompanied by
+  $pkglibdir/bitcode/[extension].index.bc
+
+just alongside the actual library.  An extension's index will be used
+to look up symbols when located in the corresponding shared
+library. Symbols that are used inside the extension, when inlined,
+will be first looked up in the main binary and then the extension's.
+
+
+Caching
+-------
+
+Currently it is not yet possible to cache generated functions, even
+though that'd be desirable from a performance point of view. The
+problem is that the generated functions commonly contain pointers into
+per-execution memory. The expression evaluation functionality needs to
+be redesigned a bit to avoid that. Basically all per-execution memory
+needs to be referenced as an offset to one block of memory stored in
+an ExprState, rather than absolute pointers into memory.
+
+Once that is addressed, adding an LRU cache that's keyed by the
+generated LLVM IR will allow to use optimized functions even for
+shorter functions.
+
+A longer term project is to move expression compilation to the planner
+stage, allowing to tie
+
+What to JIT
+===========
+
+Currently expression evaluation and tuple deforming are JITed. Those
+were chosen because they commonly are major CPU bottlenecks in
+analytics queries, but are by no means the only potentially beneficial cases.
+
+For JITing to be beneficial a piece of code first and foremost has to
+be a CPU bottleneck. But also importantly, JITing can only be
+beneficial if overhead can be removed by doing so. E.g. in the tuple
+deforming case the knowledge about the number of columns and their
+types can remove a significant number of branches, and in the
+expression evaluation case a lot of indirect jumps/calls can be
+removed.  If neither of these is the case, JITing is a waste of
+resources.
+
+Future avenues for JITing are tuple sorting, COPY parsing/output
+generation, and later compiling larger parts of queries.
+
+
+When to JIT
+===========
+
+Currently there are a number of GUCs that influence JITing:
+
+- jit_above_cost = -1, 0-DBL_MAX - all queries with a higher total cost
+  get JITed, *without* optimization (expensive part), corresponding to
+  -O0. This commonly already results in significant speedups if
+  expression/deforming is a bottleneck (removing dynamic branches
+  mostly).
+- jit_optimize_above_cost = -1, 0-DBL_MAX - all queries with a higher total cost
+  get JITed, *with* optimization (expensive part).
+- jit_inline_above_cost = -1, 0-DBL_MAX - inlining is tried if query has
+  higher cost.
+
+whenever a query's total cost is above these limits, JITing is
+performed.
+
+Alternative costing models, e.g. by generating separate paths for
+parts of a query with lower cpu_* costs, are also a possibility, but
+it's doubtful the overhead of doing so is sufficient.  Another
+alternative would be to count the number of times individual
+expressions are estimated to be evaluated, and perform JITing of these
+individual expressions.
+
+The obvious seeming approach of JITing expressions individually after
+a number of execution turns out not to work too well. Primarily
+because emitting many small functions individually has significant
+overhead. Secondarily because the time till JITing occurs causes
+relative slowdowns that eat into the gain of JIT compilation.
-- 
2.50.0