]> granicus.if.org Git - postgresql/commitdiff
Add inlining support to LLVM JIT provider.
authorAndres Freund <andres@anarazel.de>
Wed, 28 Mar 2018 20:19:08 +0000 (13:19 -0700)
committerAndres Freund <andres@anarazel.de>
Wed, 28 Mar 2018 20:19:08 +0000 (13:19 -0700)
This provides infrastructure to allow JITed code to inline code
implemented in C. This e.g. can be postgres internal functions or
extension code.

This already speeds up long running queries, by allowing the LLVM
optimizer to optimize across function boundaries. The optimization
potential currently doesn't reach its full potential because LLVM
cannot optimize the FunctionCallInfoData argument fully away, because
it's allocated on the heap rather than the stack. Fixing that is
beyond what's realistic for v11.

To be able to do that, use CLANG to convert C code to LLVM bitcode,
and have LLVM build a summary for it. That bitcode can then be used to
to inline functions at runtime. For that the bitcode needs to be
installed. Postgres bitcode goes into $pkglibdir/bitcode/postgres,
extensions go into equivalent directories.  PGXS has been modified so
that happens automatically if postgres has been compiled with LLVM
support.

Currently this isn't the fastest inline implementation, modules are
reloaded from disk during inlining. That's to work around an apparent
LLVM bug, triggering an apparently spurious error in LLVM assertion
enabled builds.  Once that is resolved we can remove the superfluous
read from disk.

Docs will follow in a later commit containing docs for the whole JIT
feature.

Author: Andres Freund
Discussion: https://postgr.es/m/20170901064131.tazjxwus3k2w3ybh@alap3.anarazel.de

13 files changed:
src/Makefile.global.in
src/backend/Makefile
src/backend/common.mk
src/backend/jit/jit.c
src/backend/jit/llvm/Makefile
src/backend/jit/llvm/llvmjit.c
src/backend/jit/llvm/llvmjit_inline.cpp [new file with mode: 0644]
src/backend/optimizer/plan/planner.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/jit/jit.h
src/include/jit/llvmjit.h
src/makefiles/pgxs.mk

index 859adfc3cb0257f15ae0348da05f906d1f488a04..04cace1017ccaa15f7cf7d37fbb17960f58af510 100644 (file)
@@ -171,6 +171,7 @@ endif # PGXS
 includedir_server = $(pkgincludedir)/server
 includedir_internal = $(pkgincludedir)/internal
 pgxsdir = $(pkglibdir)/pgxs
+bitcodedir = $(pkglibdir)/bitcode
 
 
 ##########################################################################
@@ -972,3 +973,36 @@ endif
 
 %.bc : %.cpp
        $(COMPILE.cxx.bc) -o $@ $<
+
+# Install LLVM bitcode module (for JITing).
+#
+# The arguments are:
+# $(1) name of the module (e.g. an extension's name or postgres for core code)
+# $(2) source objects, with .o suffix
+#
+define install_llvm_module
+# Create target directory
+$(MKDIR_P) "$(DESTDIR)${bitcodedir}/$(1)"
+# Create sub-directories, if files are in subdirectories
+$(MKDIR_P) $(sort $(dir $(addprefix $(DESTDIR)${bitcodedir}/$(1)/, $(2))))
+# Then install files
+#
+# The many INSTALL_DATA invocations aren't particularly fast, it'd be
+# good if we could coalesce them, but I didn't find a good way.
+$(foreach obj, ${2}, $(INSTALL_DATA) $(patsubst %.o,%.bc, $(obj)) $(DESTDIR)/${bitcodedir}/$(1)/$(dir $(obj));
+)
+# and generate index
+(cd "$(DESTDIR)${bitcodedir}" && $(LLVM_BINPATH)/llvm-lto -thinlto -thinlto-action=thinlink -o $(1).index.bc $(addprefix $(1)/,$(patsubst %.o,%.bc, $(2))))
+endef
+
+# Uninstall LLVM bitcode module.
+#
+# The arguments are:
+# $(1) name of the module (e.g. an extension's name or postgres for core code)
+#
+# This intentionally doesn't use the explicit installed file list,
+# seems too likely to change regularly.
+define uninstall_llvm_module
+rm -rf "$(DESTDIR)${bitcodedir}/$(1)/"
+rm -f "$(DESTDIR)${bitcodedir}/$(1).index.bc"
+endef
index ca230de2f3f54e3a3901a185163048a0393d9361..21b094385f626ff8008de0a2544e525299fc9011 100644 (file)
@@ -252,6 +252,13 @@ endif
        $(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample'
        $(INSTALL_DATA) $(srcdir)/access/transam/recovery.conf.sample '$(DESTDIR)$(datadir)/recovery.conf.sample'
 
+ifeq ($(with_llvm), yes)
+install-bin: install-postgres-bitcode
+
+install-postgres-bitcode: $(OBJS) all
+       $(call install_llvm_module,postgres,$(call expand_subsys, $(filter-out $(top_builddir)/src/timezone/objfiles.txt, $(SUBDIROBJS))))
+endif
+
 install-bin: postgres $(POSTGRES_IMP) installdirs
        $(INSTALL_PROGRAM) postgres$(X) '$(DESTDIR)$(bindir)/postgres$(X)'
 ifneq ($(PORTNAME), win32)
@@ -309,6 +316,9 @@ endif
              '$(DESTDIR)$(datadir)/pg_ident.conf.sample' \
              '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
              '$(DESTDIR)$(datadir)/recovery.conf.sample'
+ifeq ($(with_llvm), yes)
+       $(call uninstall_llvm_module,postgres)
+endif
 
 
 ##########################################################################
index 6eaa353aea9fc0d14301a8e4760d4de35b4b36de..08e7eff6c87b6575e4e747f94f5a843f7e5e9e48 100644 (file)
@@ -30,6 +30,10 @@ objfiles.txt: Makefile $(SUBDIROBJS) $(OBJS)
 # Don't rebuild the list if only the OBJS have changed.
        $(if $(filter-out $(OBJS),$?),( $(if $(SUBDIROBJS),cat $(SUBDIROBJS); )echo $(addprefix $(subdir)/,$(OBJS)) ) >$@,touch $@)
 
+ifeq ($(with_llvm), yes)
+objfiles.txt: $(patsubst %.o,%.bc, $(OBJS))
+endif
+
 # make function to expand objfiles.txt contents
 expand_subsys = $(foreach file,$(1),$(if $(filter %/objfiles.txt,$(file)),$(patsubst ../../src/backend/%,%,$(addprefix $(top_builddir)/,$(shell cat $(file)))),$(file)))
 
@@ -43,7 +47,7 @@ $(SUBDIRS:%=%-recursive):
 $(call recurse,clean)
 clean: clean-local
 clean-local:
-       rm -f $(subsysfilename) $(OBJS)
+       rm -f $(subsysfilename) $(OBJS) $(patsubst %.o,%.bc, $(OBJS))
 
 $(call recurse,coverage)
 $(call recurse,install)
index 67a015fb35e24ef046a6682809b3c38406b33fab..c1703094db71e2636180e61ce870c395dddb8636 100644 (file)
@@ -40,6 +40,7 @@ bool          jit_expressions = true;
 bool           jit_profiling_support = false;
 bool           jit_tuple_deforming = true;
 double         jit_above_cost = 100000;
+double         jit_inline_above_cost = 500000;
 double         jit_optimize_above_cost = 500000;
 
 static JitProviderCallbacks provider;
index d6a1f5f02dbc4ae26f8083ddd60be559ac6d9194..d7a36d73717999659824ddce8cdb145163eb4ff8 100644 (file)
@@ -37,7 +37,7 @@ override COMPILER = $(CXX) $(CFLAGS)
 OBJS=$(WIN32RES)
 
 # Infrastructure
-OBJS += llvmjit.o llvmjit_error.o llvmjit_wrap.o
+OBJS += llvmjit.o llvmjit_error.o llvmjit_inline.o llvmjit_wrap.o
 # Code generation
 OBJS += llvmjit_expr.o llvmjit_deform.o
 
index 5a33e52e1dce15326ca11bfd3c92df59231be22e..daae964b1cea26bddcee1bc9455b60394ed4d8fa 100644 (file)
@@ -468,6 +468,10 @@ llvm_optimize_module(LLVMJitContext *context, LLVMModuleRef module)
        /* always use always-inliner pass */
        if (!(context->base.flags & PGJIT_OPT3))
                LLVMAddAlwaysInlinerPass(llvm_mpm);
+       /* if doing inlining, but no expensive optimization, add inlining pass */
+       if (context->base.flags & PGJIT_INLINE
+               && !(context->base.flags & PGJIT_OPT3))
+               LLVMAddFunctionInliningPass(llvm_mpm);
        LLVMRunPassManager(llvm_mpm, context->module);
        LLVMDisposePassManager(llvm_mpm);
 
@@ -491,6 +495,16 @@ llvm_compile_module(LLVMJitContext *context)
        else
                compile_orc = llvm_opt0_orc;
 
+       /* perform inlining */
+       if (context->base.flags & PGJIT_INLINE)
+       {
+               INSTR_TIME_SET_CURRENT(starttime);
+               llvm_inline(context->module);
+               INSTR_TIME_SET_CURRENT(endtime);
+               INSTR_TIME_ACCUM_DIFF(context->base.inlining_counter,
+                                                         endtime, starttime);
+       }
+
        if (jit_dump_bitcode)
        {
                char       *filename;
@@ -578,7 +592,8 @@ llvm_compile_module(LLVMJitContext *context)
        MemoryContextSwitchTo(oldcontext);
 
        ereport(DEBUG1,
-                       (errmsg("time to opt: %.3fs, emit: %.3fs",
+                       (errmsg("time to inline: %.3fs, opt: %.3fs, emit: %.3fs",
+                                       INSTR_TIME_GET_DOUBLE(context->base.inlining_counter),
                                        INSTR_TIME_GET_DOUBLE(context->base.optimization_counter),
                                        INSTR_TIME_GET_DOUBLE(context->base.emission_counter)),
                         errhidestmt(true),
diff --git a/src/backend/jit/llvm/llvmjit_inline.cpp b/src/backend/jit/llvm/llvmjit_inline.cpp
new file mode 100644 (file)
index 0000000..130e2ab
--- /dev/null
@@ -0,0 +1,877 @@
+/*-------------------------------------------------------------------------
+ *
+ * llvmjit_inline.cpp
+ *       Cross module inlining suitable for postgres' JIT
+ *
+ * The inliner iterates over external functions referenced from the passed
+ * module and attempts to inline those.  It does so by utilizing pre-built
+ * indexes over both postgres core code and extension modules.  When a match
+ * for an external function is found - not guaranteed! - the index will then
+ * be used to judge their instruction count / inline worthiness. After doing
+ * so for all external functions, all the referenced functions (and
+ * prerequisites) will be imorted.
+ *
+ * Copyright (c) 2016-2018, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       src/backend/lib/llvmjit/llvmjit_inline.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+extern "C"
+{
+#include "postgres.h"
+}
+
+#include "jit/llvmjit.h"
+
+extern "C"
+{
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "common/string.h"
+#include "miscadmin.h"
+#include "storage/fd.h"
+}
+
+#include <llvm-c/Core.h>
+#include <llvm-c/BitReader.h>
+
+#include <llvm/ADT/SetVector.h>
+#include <llvm/ADT/StringSet.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/Analysis/ModuleSummaryAnalysis.h>
+#if LLVM_VERSION_MAJOR > 3
+#include <llvm/Bitcode/BitcodeReader.h>
+#else
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Support/Error.h>
+#endif
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/CallSite.h>
+#include <llvm/IR/DebugInfo.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/ModuleSummaryIndex.h>
+#include <llvm/Linker/IRMover.h>
+#include <llvm/Support/ManagedStatic.h>
+
+
+/*
+ * Type used to represent modules InlineWorkListItem's subject is searched for
+ * in.
+ */
+typedef llvm::SmallVector<llvm::ModuleSummaryIndex *, 2> InlineSearchPath;
+
+/*
+ * Item in queue of to-be-checked symbols and corresponding queue.
+ */
+typedef struct InlineWorkListItem
+{
+       llvm::StringRef symbolName;
+       llvm::SmallVector<llvm::ModuleSummaryIndex *, 2> searchpath;
+} InlineWorkListItem;
+typedef llvm::SmallVector<InlineWorkListItem, 128> InlineWorkList;
+
+/*
+ * Information about symbols processed during inlining. Used to prevent
+ * repeated searches and provide additional information.
+ */
+typedef struct FunctionInlineState
+{
+       int costLimit;
+       bool processed;
+       bool inlined;
+       bool allowReconsidering;
+} FunctionInlineState;
+typedef llvm::StringMap<FunctionInlineState> FunctionInlineStates;
+
+/*
+ * Map of modules that should be inlined, with a list of the to-be inlined
+ * symbols.
+ */
+typedef llvm::StringMap<llvm::StringSet<> > ImportMapTy;
+
+
+const float inline_cost_decay_factor = 0.5;
+const int inline_initial_cost = 150;
+
+/*
+ * These are managed statics so LLVM knows to deallocate them during an
+ * LLVMShutdown(), rather than after (which'd cause crashes).
+ */
+typedef llvm::StringMap<std::unique_ptr<llvm::Module> > ModuleCache;
+llvm::ManagedStatic<ModuleCache> module_cache;
+typedef llvm::StringMap<std::unique_ptr<llvm::ModuleSummaryIndex> > SummaryCache;
+llvm::ManagedStatic<SummaryCache> summary_cache;
+
+
+static std::unique_ptr<ImportMapTy> llvm_build_inline_plan(llvm::Module *mod);
+static void llvm_execute_inline_plan(llvm::Module *mod,
+                                                                        ImportMapTy *globalsToInline);
+
+static llvm::Module* load_module_cached(llvm::StringRef modPath);
+static std::unique_ptr<llvm::Module> load_module(llvm::StringRef Identifier);
+static std::unique_ptr<llvm::ModuleSummaryIndex> llvm_load_summary(llvm::StringRef path);
+
+
+static llvm::Function* create_redirection_function(std::unique_ptr<llvm::Module> &importMod,
+                                                                                                  llvm::Function *F,
+                                                                                                  llvm::StringRef Name);
+
+static bool function_inlinable(llvm::Function &F,
+                                                          int threshold,
+                                                          FunctionInlineStates &functionState,
+                                                          InlineWorkList &worklist,
+                                                          InlineSearchPath &searchpath,
+                                                          llvm::SmallPtrSet<const llvm::Function *, 8> &visitedFunctions,
+                                                          int &running_instcount,
+                                                          llvm::StringSet<> &importVars);
+static void function_references(llvm::Function &F,
+                                                               int &running_instcount,
+                                                               llvm::SmallPtrSet<llvm::GlobalVariable *, 8> &referencedVars,
+                                                               llvm::SmallPtrSet<llvm::Function *, 8> &referencedFunctions);
+
+static void add_module_to_inline_search_path(InlineSearchPath& path, llvm::StringRef modpath);
+static llvm::SmallVector<llvm::GlobalValueSummary *, 1>
+summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid);
+
+/* verbose debugging for inliner development */
+/* #define INLINE_DEBUG */
+#ifdef INLINE_DEBUG
+#define ilog           elog
+#else
+#define ilog(...)      (void) 0
+#endif
+
+/*
+ * Perform inlining of external function references in M based on a simple
+ * cost based analysis.
+ */
+void
+llvm_inline(LLVMModuleRef M)
+{
+       llvm::Module *mod = llvm::unwrap(M);
+
+       std::unique_ptr<ImportMapTy> globalsToInline = llvm_build_inline_plan(mod);
+       if (!globalsToInline)
+               return;
+       llvm_execute_inline_plan(mod, globalsToInline.get());
+}
+
+/*
+ * Build information necessary for inlining external function references in
+ * mod.
+ */
+static std::unique_ptr<ImportMapTy>
+llvm_build_inline_plan(llvm::Module *mod)
+{
+       std::unique_ptr<ImportMapTy> globalsToInline = llvm::make_unique<ImportMapTy>();
+       FunctionInlineStates functionStates;
+       InlineWorkList worklist;
+
+       InlineSearchPath defaultSearchPath;
+
+       /* attempt to add module to search path */
+       add_module_to_inline_search_path(defaultSearchPath, "$libdir/postgres");
+       /* if postgres isn't available, no point continuing */
+       if (defaultSearchPath.empty())
+               return nullptr;
+
+       /*
+        * Start inlining with current references to external functions by putting
+        * them on the inlining worklist. If, during inlining of those, new extern
+        * functions need to be inlined, they'll also be put there, with a lower
+        * priority.
+        */
+       for (const llvm::Function &funcDecl : mod->functions())
+       {
+               InlineWorkListItem item = {};
+               FunctionInlineState inlineState = {};
+
+               /* already has a definition */
+               if (!funcDecl.isDeclaration())
+                       continue;
+
+               /* llvm provides implementation */
+               if (funcDecl.isIntrinsic())
+                       continue;
+
+               item.symbolName = funcDecl.getName();
+               item.searchpath = defaultSearchPath;
+               worklist.push_back(item);
+               inlineState.costLimit = inline_initial_cost;
+               inlineState.processed = false;
+               inlineState.inlined = false;
+               inlineState.allowReconsidering = false;
+               functionStates[funcDecl.getName()] = inlineState;
+       }
+
+       /*
+        * Iterate over pending worklist items, look them up in index, check
+        * whether they should be inlined.
+        */
+       while (!worklist.empty())
+       {
+               InlineWorkListItem item = worklist.pop_back_val();
+               llvm::StringRef symbolName = item.symbolName;
+               char *cmodname;
+               char *cfuncname;
+               FunctionInlineState &inlineState = functionStates[symbolName];
+               llvm::GlobalValue::GUID funcGUID;
+
+               llvm_split_symbol_name(symbolName.data(), &cmodname, &cfuncname);
+
+               funcGUID = llvm::GlobalValue::getGUID(cfuncname);
+
+               /* already processed */
+               if (inlineState.processed)
+                       continue;
+
+
+               if (cmodname)
+                       add_module_to_inline_search_path(item.searchpath, cmodname);
+
+               /*
+                * Iterate over all known definitions of function, via the index. Then
+                * look up module(s), check if function actually is defined (there
+                * could be hash conflicts).
+                */
+               for (const auto &gvs : summaries_for_guid(item.searchpath, funcGUID))
+               {
+                       const llvm::FunctionSummary *fs;
+                       llvm::StringRef modPath = gvs->modulePath();
+                       llvm::Module *defMod;
+                       llvm::Function *funcDef;
+
+                       fs = llvm::cast<llvm::FunctionSummary>(gvs);
+
+#if LLVM_VERSION_MAJOR > 3
+                       if (gvs->notEligibleToImport())
+                       {
+                               ilog(DEBUG1, "ineligibile to import %s due to summary",
+                                        symbolName.data());
+                               continue;
+                       }
+#endif
+
+                       if ((int) fs->instCount() > inlineState.costLimit)
+                       {
+                               ilog(DEBUG1, "ineligibile to import %s due to early threshold: %u vs %u",
+                                        symbolName.data(), fs->instCount(), inlineState.costLimit);
+                               inlineState.allowReconsidering = true;
+                               continue;
+                       }
+
+                       defMod = load_module_cached(modPath);
+                       if (defMod->materializeMetadata())
+                               elog(FATAL, "failed to materialize metadata");
+
+                       funcDef = defMod->getFunction(cfuncname);
+
+                       /*
+                        * This can happen e.g. in case of a hash collision of the
+                        * function's name.
+                        */
+                       if (!funcDef)
+                               continue;
+
+                       if (funcDef->materialize())
+                               elog(FATAL, "failed to materialize metadata");
+
+                       Assert(!funcDef->isDeclaration());
+                       Assert(funcDef->hasExternalLinkage());
+
+                       /* don't inline functions marked as noinline */
+                       if (funcDef->getAttributes().hasFnAttribute(llvm::Attribute::NoInline))
+                       {
+                               ilog(DEBUG1, "ineligibile to import %s due to noinline",
+                                        symbolName.data());
+                               continue;
+                       }
+
+                       llvm::StringSet<> importVars;
+                       llvm::SmallPtrSet<const llvm::Function *, 8> visitedFunctions;
+                       int running_instcount = 0;
+
+                       /*
+                        * Check whether function, and objects it depends on, are
+                        * inlinable.
+                        */
+                       if (function_inlinable(*funcDef,
+                                                                  inlineState.costLimit,
+                                                                  functionStates,
+                                                                  worklist,
+                                                                  item.searchpath,
+                                                                  visitedFunctions,
+                                                                  running_instcount,
+                                                                  importVars))
+                       {
+                               /*
+                                * Check whether function and all its dependencies are too
+                                * big. Dependencies already counted for other functions that
+                                * will get inlined are not counted again. While this make
+                                * things somewhat order dependant, I can't quite see a point
+                                * in a different behaviour.
+                                */
+                               if (running_instcount > inlineState.costLimit)
+                               {
+                                       ilog(DEBUG1, "skipping inlining of %s due to late threshold %d vs %d",
+                                                symbolName.data(), running_instcount, inlineState.costLimit);
+                                       inlineState.allowReconsidering = true;
+                                       continue;
+                               }
+
+                               ilog(DEBUG1, "inline top function %s total_instcount: %d, partial: %d",
+                                        symbolName.data(), running_instcount, fs->instCount());
+
+                               /* import referenced function itself */
+                               importVars.insert(symbolName);
+
+                               {
+                                       llvm::StringSet<> &modGlobalsToInline = (*globalsToInline)[modPath];
+                                       for (auto& importVar : importVars)
+                                               modGlobalsToInline.insert(importVar.first());
+                                       Assert(modGlobalsToInline.size() > 0);
+                               }
+
+                               /* mark function as inlined */
+                               inlineState.inlined = true;
+
+                               /*
+                                * Found definition to inline, don't look for further
+                                * potential definitions.
+                                */
+                               break;
+                       }
+                       else
+                       {
+                               ilog(DEBUG1, "had to skip inlining %s",
+                                        symbolName.data());
+
+                               /* It's possible there's another definition that's inlinable. */
+                       }
+               }
+
+               /*
+                * Signal that we're done with symbol, whether successful (inlined =
+                * true above) or not.
+                */
+               inlineState.processed = true;
+       }
+
+       return globalsToInline;
+}
+
+/*
+ * Perform the actual inlining of external functions (and their dependencies)
+ * into mod.
+ */
+static void
+llvm_execute_inline_plan(llvm::Module *mod, ImportMapTy *globalsToInline)
+{
+       llvm::IRMover Mover(*mod);
+
+       for (const auto& toInline : *globalsToInline)
+       {
+               const llvm::StringRef& modPath = toInline.first();
+               const llvm::StringSet<>& modGlobalsToInline = toInline.second;
+               llvm::SetVector<llvm::GlobalValue *> GlobalsToImport;
+
+               Assert(module_cache->count(modPath));
+               std::unique_ptr<llvm::Module> importMod(std::move((*module_cache)[modPath]));
+               module_cache->erase(modPath);
+
+               if (modGlobalsToInline.empty())
+                       continue;
+
+               for (auto &glob: modGlobalsToInline)
+               {
+                       llvm::StringRef SymbolName = glob.first();
+                       char *modname;
+                       char *funcname;
+
+                       llvm_split_symbol_name(SymbolName.data(), &modname, &funcname);
+
+                       llvm::GlobalValue *valueToImport = importMod->getNamedValue(funcname);
+
+                       if (!valueToImport)
+                               elog(FATAL, "didn't refind value %s to import", SymbolName.data());
+
+                       /*
+                        * For functions (global vars are only inlined if already static),
+                        * mark imported variables as being clones from other
+                        * functions. That a) avoids symbol conflicts b) allows the
+                        * optimizer to perform inlining.
+                       */
+                       if (llvm::isa<llvm::Function>(valueToImport))
+                       {
+                               llvm::Function *F = llvm::dyn_cast<llvm::Function>(valueToImport);
+                               typedef llvm::GlobalValue::LinkageTypes LinkageTypes;
+
+                               /*
+                                * Per-function info isn't necessarily stripped yet, as the
+                                * module is lazy-loaded when stripped above.
+                                */
+                               llvm::stripDebugInfo(*F);
+
+                               /*
+                                * If the to-be-imported function is one referenced including
+                                * its module name, create a tiny inline function that just
+                                * forwards the call. One might think a GlobalAlias would do
+                                * the trick, but a) IRMover doesn't override a declaration
+                                * with an alias pointing to a definition (instead renaming
+                                * it), b) Aliases can't be AvailableExternally.
+                                */
+                               if (modname)
+                               {
+                                       llvm::Function *AF;
+
+                                       AF = create_redirection_function(importMod, F, SymbolName);
+
+                                       GlobalsToImport.insert(AF);
+                                       llvm::stripDebugInfo(*AF);
+                               }
+
+                               if (valueToImport->hasExternalLinkage())
+                               {
+                                       valueToImport->setLinkage(LinkageTypes::AvailableExternallyLinkage);
+                               }
+                       }
+
+                       GlobalsToImport.insert(valueToImport);
+                       ilog(DEBUG1, "performing import of %s %s",
+                                modPath.data(), SymbolName.data());
+
+               }
+
+#if LLVM_VERSION_MAJOR > 4
+#define IRMOVE_PARAMS , /*IsPerformingImport=*/false
+#elif LLVM_VERSION_MAJOR > 3
+#define IRMOVE_PARAMS , /*LinkModuleInlineAsm=*/false, /*IsPerformingImport=*/false
+#else
+#define IRMOVE_PARAMS
+#endif
+               if (Mover.move(std::move(importMod), GlobalsToImport.getArrayRef(),
+                                          [](llvm::GlobalValue &, llvm::IRMover::ValueAdder) {}
+                                          IRMOVE_PARAMS))
+                       elog(FATAL, "function import failed with linker error");
+       }
+}
+
+/*
+ * Return a module identified by modPath, caching it in memory.
+ *
+ * Note that such a module may *not* be modified without copying, otherwise
+ * the cache state would get corrupted.
+ */
+static llvm::Module*
+load_module_cached(llvm::StringRef modPath)
+{
+       auto it = module_cache->find(modPath);
+       if (it == module_cache->end())
+       {
+               it = module_cache->insert(
+                       std::make_pair(modPath, load_module(modPath))).first;
+       }
+
+       return it->second.get();
+}
+
+static std::unique_ptr<llvm::Module>
+load_module(llvm::StringRef Identifier)
+{
+       LLVMMemoryBufferRef buf;
+       LLVMModuleRef mod;
+       char path[MAXPGPATH];
+       char *msg;
+
+       snprintf(path, MAXPGPATH,"%s/bitcode/%s", pkglib_path, Identifier.data());
+
+       if (LLVMCreateMemoryBufferWithContentsOfFile(path, &buf, &msg))
+               elog(FATAL, "failed to open bitcode file \"%s\": %s",
+                        path, msg);
+       if (LLVMGetBitcodeModuleInContext2(LLVMGetGlobalContext(), buf, &mod))
+               elog(FATAL, "failed to parse bitcode in file \"%s\"", path);
+
+       /*
+        * Currently there's no use in more detailed debug info for JITed
+        * code. Until that changes, not much point in wasting memory and cycles
+        * on processing debuginfo.
+        */
+       llvm::StripDebugInfo(*llvm::unwrap(mod));
+
+       return std::unique_ptr<llvm::Module>(llvm::unwrap(mod));
+}
+
+/*
+ * Compute list of referenced variables, functions and the instruction count
+ * for a function.
+ */
+static void
+function_references(llvm::Function &F,
+                                       int &running_instcount,
+                                       llvm::SmallPtrSet<llvm::GlobalVariable *, 8> &referencedVars,
+                                       llvm::SmallPtrSet<llvm::Function *, 8> &referencedFunctions)
+{
+       llvm::SmallPtrSet<const llvm::User *, 32> Visited;
+
+       for (llvm::BasicBlock &BB : F)
+       {
+               for (llvm::Instruction &I : BB)
+               {
+                       if (llvm::isa<llvm::DbgInfoIntrinsic>(I))
+                               continue;
+
+                       llvm::SmallVector<llvm::User *, 8> Worklist;
+                       Worklist.push_back(&I);
+
+                       running_instcount++;
+
+                       while (!Worklist.empty()) {
+                               llvm::User *U = Worklist.pop_back_val();
+
+                               /* visited before */
+                               if (!Visited.insert(U).second)
+                                       continue;
+
+                               for (auto &OI : U->operands()) {
+                                       llvm::User *Operand = llvm::dyn_cast<llvm::User>(OI);
+                                       if (!Operand)
+                                               continue;
+                                       if (llvm::isa<llvm::BlockAddress>(Operand))
+                                               continue;
+                                       if (auto *GV = llvm::dyn_cast<llvm::GlobalVariable>(Operand)) {
+                                               referencedVars.insert(GV);
+                                               if (GV->hasInitializer())
+                                                       Worklist.push_back(GV->getInitializer());
+                                               continue;
+                                       }
+                                       if (auto *CF = llvm::dyn_cast<llvm::Function>(Operand)) {
+                                               referencedFunctions.insert(CF);
+                                               continue;
+                                       }
+                                       Worklist.push_back(Operand);
+                               }
+                       }
+               }
+       }
+}
+
+/*
+ * Check whether function F is inlinable and, if so, what globals need to be
+ * imported.
+ *
+ * References to external functions from, potentially recursively, inlined
+ * functions are added to the passed in worklist.
+ */
+static bool
+function_inlinable(llvm::Function &F,
+                                  int threshold,
+                                  FunctionInlineStates &functionStates,
+                                  InlineWorkList &worklist,
+                                  InlineSearchPath &searchpath,
+                                  llvm::SmallPtrSet<const llvm::Function *, 8> &visitedFunctions,
+                                  int &running_instcount,
+                                  llvm::StringSet<> &importVars)
+{
+       int subThreshold = threshold * inline_cost_decay_factor;
+       llvm::SmallPtrSet<llvm::GlobalVariable *, 8> referencedVars;
+       llvm::SmallPtrSet<llvm::Function *, 8> referencedFunctions;
+
+       /* can't rely on what may be inlined */
+       if (F.isInterposable())
+               return false;
+
+       /*
+        * Can't rely on function being present. Alternatively we could create a
+        * static version of these functions?
+        */
+       if (F.hasAvailableExternallyLinkage())
+               return false;
+
+       ilog(DEBUG1, "checking inlinability of %s", F.getName().data());
+
+       if (F.materialize())
+               elog(FATAL, "failed to materialize metadata");
+
+       function_references(F, running_instcount, referencedVars, referencedFunctions);
+
+       for (llvm::GlobalVariable* rv: referencedVars)
+       {
+               if (rv->materialize())
+                       elog(FATAL, "failed to materialize metadata");
+
+               /*
+                * Never want to inline externally visible vars, cheap enough to
+                * reference.
+                */
+               if (rv->hasExternalLinkage() || rv->hasAvailableExternallyLinkage())
+                       continue;
+
+               /*
+                * If variable is file-local, we need to inline it, to be able to
+                * inline the function itself. Can't do that if the variable can be
+                * modified, because they'd obviously get out of sync.
+                *
+                * XXX: Currently not a problem, but there'd be problems with
+                * nontrivial initializers if they were allowed for postgres.
+                */
+               if (!rv->isConstant())
+               {
+                       ilog(DEBUG1, "cannot inline %s due to uncloneable variable %s",
+                                F.getName().data(), rv->getName().data());
+                       return false;
+               }
+
+               ilog(DEBUG1, "memorizing global var %s linkage %d for inlining",
+                        rv->getName().data(), (int)rv->getLinkage());
+
+               importVars.insert(rv->getName());
+               /* small cost attributed to each cloned global */
+               running_instcount += 5;
+       }
+
+       visitedFunctions.insert(&F);
+
+       /*
+        * Check referenced functions. Check whether used static ones are
+        * inlinable, and remember external ones for inlining.
+        */
+       for (llvm::Function* referencedFunction: referencedFunctions)
+       {
+               llvm::StringSet<> recImportVars;
+
+               if (referencedFunction->materialize())
+                       elog(FATAL, "failed to materialize metadata");
+
+               if (referencedFunction->isIntrinsic())
+                       continue;
+
+               /* if already visited skip, otherwise remember */
+               if (!visitedFunctions.insert(referencedFunction).second)
+                       continue;
+
+               /*
+                * We don't inline external functions directly here, instead we put
+                * them on the worklist if appropriate and check them from
+                * llvm_build_inline_plan().
+                */
+               if (referencedFunction->hasExternalLinkage())
+               {
+                       llvm::StringRef funcName = referencedFunction->getName();
+
+                       /*
+                        * Don't bother checking for inlining if remaining cost budget is
+                        * very small.
+                        */
+                       if (subThreshold < 5)
+                               continue;
+
+                       auto it = functionStates.find(funcName);
+                       if (it == functionStates.end())
+                       {
+                               FunctionInlineState inlineState;
+
+                               inlineState.costLimit = subThreshold;
+                               inlineState.processed = false;
+                               inlineState.inlined = false;
+                               inlineState.allowReconsidering = false;
+
+                               functionStates[funcName] = inlineState;
+                               worklist.push_back({funcName, searchpath});
+
+                               ilog(DEBUG1,
+                                        "considering extern function %s at %d for inlining",
+                                        funcName.data(), subThreshold);
+                       }
+                       else if (!it->second.inlined &&
+                                        (!it->second.processed || it->second.allowReconsidering) &&
+                                        it->second.costLimit < subThreshold)
+                       {
+                               /*
+                                * Update inlining threshold if higher. Need to re-queue
+                                * to be processed if already processed with lower
+                                * threshold.
+                                */
+                               if (it->second.processed)
+                               {
+                                       ilog(DEBUG1,
+                                                "reconsidering extern function %s at %d for inlining, increasing from %d",
+                                                funcName.data(), subThreshold, it->second.costLimit);
+
+                                       it->second.processed = false;
+                                       it->second.allowReconsidering = false;
+                                       worklist.push_back({funcName, searchpath});
+                               }
+                               it->second.costLimit = subThreshold;
+                       }
+                       continue;
+               }
+
+               /* can't rely on what may be inlined */
+               if (referencedFunction->isInterposable())
+                       return false;
+
+               if (!function_inlinable(*referencedFunction,
+                                                               subThreshold,
+                                                               functionStates,
+                                                               worklist,
+                                                               searchpath,
+                                                               visitedFunctions,
+                                                               running_instcount,
+                                                               recImportVars))
+               {
+                       ilog(DEBUG1,
+                                "cannot inline %s due to required function %s not being inlinable",
+                                F.getName().data(), referencedFunction->getName().data());
+                       return false;
+               }
+
+               /* import referenced function itself */
+               importVars.insert(referencedFunction->getName());
+
+               /* import referenced function and its dependants */
+               for (auto& recImportVar : recImportVars)
+                       importVars.insert(recImportVar.first());
+       }
+
+       return true;
+}
+
+/*
+ * Attempt to load module summary located at path. Return empty pointer when
+ * loading fails.
+ */
+static std::unique_ptr<llvm::ModuleSummaryIndex>
+llvm_load_summary(llvm::StringRef path)
+{
+       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer> > MBOrErr =
+               llvm::MemoryBuffer::getFile(path);
+
+       if (std::error_code EC = MBOrErr.getError())
+       {
+               ilog(DEBUG1, "failed to open %s: %s", path.data(),
+                        EC.message().c_str());
+       }
+       else
+       {
+               llvm::MemoryBufferRef ref(*MBOrErr.get().get());
+
+#if LLVM_VERSION_MAJOR > 3
+               llvm::Expected<std::unique_ptr<llvm::ModuleSummaryIndex> > IndexOrErr =
+                       llvm::getModuleSummaryIndex(ref);
+               if (IndexOrErr)
+                       return std::move(IndexOrErr.get());
+               elog(FATAL, "failed to load summary \"%s\": %s",
+                        path.data(),
+                        toString(IndexOrErr.takeError()).c_str());
+#else
+               llvm::ErrorOr<std::unique_ptr<llvm::ModuleSummaryIndex> > IndexOrErr =
+                       llvm::getModuleSummaryIndex(ref, [](const llvm::DiagnosticInfo &) {});
+               if (IndexOrErr)
+                       return std::move(IndexOrErr.get());
+               elog(FATAL, "failed to load summary \"%s\": %s",
+                        path.data(),
+                        IndexOrErr.getError().message().c_str());
+#endif
+       }
+       return nullptr;
+}
+
+/*
+ * Attempt to add modpath to the search path.
+ */
+static void
+add_module_to_inline_search_path(InlineSearchPath& searchpath, llvm::StringRef modpath)
+{
+       /* only extension in libdir are candidates for inlining for now */
+       if (!modpath.startswith("$libdir/"))
+               return;
+
+       /* if there's no match, attempt to load */
+       auto it = summary_cache->find(modpath);
+       if (it == summary_cache->end())
+       {
+               std::string path(modpath);
+               path = path.replace(0, strlen("$libdir"), std::string(pkglib_path) + "/bitcode");
+               path += ".index.bc";
+               (*summary_cache)[modpath] = llvm_load_summary(path);
+               it = summary_cache->find(modpath);
+       }
+
+       Assert(it != summary_cache->end());
+
+       /* if the entry isn't NULL, it's validly loaded */
+       if (it->second)
+               searchpath.push_back(it->second.get());
+}
+
+/*
+ * Search for all references for functions hashing to guid in the search path,
+ * and return them in search path order.
+ */
+static llvm::SmallVector<llvm::GlobalValueSummary *, 1>
+summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid)
+{
+       llvm::SmallVector<llvm::GlobalValueSummary *, 1> matches;
+
+       for (auto index : path)
+       {
+#if LLVM_VERSION_MAJOR > 4
+               llvm::ValueInfo funcVI = index->getValueInfo(guid);
+
+               /* if index doesn't know function, we don't have a body, continue */
+               if (funcVI)
+                       for (auto &gv : funcVI.getSummaryList())
+                               matches.push_back(gv.get());
+#else
+               const llvm::const_gvsummary_iterator &I =
+                       index->findGlobalValueSummaryList(guid);
+               if (I != index->end())
+               {
+                       for (auto &gv : I->second)
+                               matches.push_back(gv.get());
+               }
+#endif
+       }
+
+       return matches;
+}
+
+/*
+ * Create inline wrapper with the name Name, redirecting the call to F.
+ */
+static llvm::Function*
+create_redirection_function(std::unique_ptr<llvm::Module> &importMod,
+                                                       llvm::Function *F,
+                                                       llvm::StringRef Name)
+{
+       typedef llvm::GlobalValue::LinkageTypes LinkageTypes;
+
+       llvm::LLVMContext &Context = F->getContext();
+       llvm::IRBuilder<> Builder(Context);
+       llvm::Function *AF;
+       llvm::BasicBlock *BB;
+       llvm::CallInst *fwdcall;
+       llvm::Attribute inlineAttribute;
+
+       AF = llvm::Function::Create(F->getFunctionType(),
+                                                               LinkageTypes::AvailableExternallyLinkage,
+                                                               Name, importMod.get());
+       BB = llvm::BasicBlock::Create(Context, "entry", AF);
+
+       Builder.SetInsertPoint(BB);
+       fwdcall = Builder.CreateCall(F, &*AF->arg_begin());
+       inlineAttribute = llvm::Attribute::get(Context,
+                                                                                  llvm::Attribute::AlwaysInline);
+       fwdcall->addAttribute(~0U, inlineAttribute);
+       Builder.CreateRet(fwdcall);
+
+       return AF;
+}
index 52c21e687056bd38be6fb855279391f2e2e2ed6a..a19f5d0c02a1bb8e880c3b9f365902b5adb5ecc9 100644 (file)
@@ -544,6 +544,9 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
                if (jit_optimize_above_cost >= 0 &&
                        top_plan->total_cost > jit_optimize_above_cost)
                        result->jitFlags |= PGJIT_OPT3;
+               if (jit_inline_above_cost >= 0 &&
+                       top_plan->total_cost > jit_inline_above_cost)
+                       result->jitFlags |= PGJIT_INLINE;
 
                /*
                 * Decide which operations should be JITed.
index d075cb139a3cc3cdcbc4d107ff710e12ee65b689..4ffc8451ca4885ff2ff02c5510b1daf1ed0a130e 100644 (file)
@@ -3117,6 +3117,16 @@ static struct config_real ConfigureNamesReal[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"jit_inline_above_cost", PGC_USERSET, QUERY_TUNING_COST,
+                       gettext_noop("Perform JIT inlining if query is more expensive."),
+                       gettext_noop("-1 disables inlining.")
+               },
+               &jit_inline_above_cost,
+               500000, -1, DBL_MAX,
+               NULL, NULL, NULL
+       },
+
        {
                {"cursor_tuple_fraction", PGC_USERSET, QUERY_TUNING_OTHER,
                        gettext_noop("Sets the planner's estimate of the fraction of "
index 4b692dc3e5d1d992803f2166d7bc78816eef2d73..66d09388278fd70044cf5ca0158131317754411f 100644 (file)
                                        # and query more expensive, -1 disables
 #jit_optimize_above_cost = 500000      # optimize JITed functions if query is
                                        # more expensive, -1 disables
+#jit_inline_above_cost = 500000                # attempt to inline operators and
+                                       # functions if query is more expensive,
+                                       # -1 disables
 
 #min_parallel_table_scan_size = 8MB
 #min_parallel_index_scan_size = 512kB
index efcd6a52cfd8db60ecab29d18e3575c68af2e321..85d234ff3b861255dd17cdf4320f424486e1b629 100644 (file)
@@ -19,7 +19,7 @@
 #define PGJIT_NONE     0
 #define PGJIT_PERFORM  1 << 0
 #define PGJIT_OPT3     1 << 1
-/* reserved for PGJIT_INLINE */
+#define PGJIT_INLINE   1 << 2
 #define PGJIT_EXPR        1 << 3
 #define PGJIT_DEFORM   1 << 4
 
@@ -37,6 +37,9 @@ typedef struct JitContext
        /* accumulated time to generate code */
        instr_time      generation_counter;
 
+       /* accumulated time for inlining */
+       instr_time      inlining_counter;
+
        /* accumulated time for optimization */
        instr_time      optimization_counter;
 
@@ -70,6 +73,7 @@ extern bool jit_expressions;
 extern bool jit_profiling_support;
 extern bool jit_tuple_deforming;
 extern double jit_above_cost;
+extern double jit_inline_above_cost;
 extern double jit_optimize_above_cost;
 
 
index f6aed64d8d5e4e8c6b38c057a740237d5b21576e..dc8fa57f840bd852bb600b4fa6ff624c81afccf0 100644 (file)
@@ -103,6 +103,7 @@ extern LLVMValueRef llvm_function_reference(LLVMJitContext *context,
                                                LLVMModuleRef mod,
                                                FunctionCallInfo fcinfo);
 
+extern void llvm_inline(LLVMModuleRef mod);
 
 /*
  ****************************************************************************
index c27004ecfbe69d47689eb997f9ca1bcf1322cbfe..5beb1e6b3c34fa65c9fe4a3bad4b228cc45c0c83 100644 (file)
@@ -101,6 +101,10 @@ endif
 
 all: $(PROGRAM) $(DATA_built) $(SCRIPTS_built) $(addsuffix $(DLSUFFIX), $(MODULES)) $(addsuffix .control, $(EXTENSION))
 
+ifeq ($(with_llvm), yes)
+all: $(addsuffix .bc, $(MODULES)) $(patsubst %.o,%.bc, $(OBJS))
+endif
+
 ifdef MODULE_big
 # shared library parameters
 NAME = $(MODULE_big)
@@ -123,6 +127,9 @@ ifneq (,$(DATA_TSEARCH))
 endif # DATA_TSEARCH
 ifdef MODULES
        $(INSTALL_SHLIB) $(addsuffix $(DLSUFFIX), $(MODULES)) '$(DESTDIR)$(pkglibdir)/'
+ifeq ($(with_llvm), yes)
+       $(foreach mod, $(MODULES), $(call install_llvm_module,$(mod),$(mod).bc))
+endif # with_llvm
 endif # MODULES
 ifdef DOCS
 ifdef docdir
@@ -138,8 +145,11 @@ endif # SCRIPTS
 ifdef SCRIPTS_built
        $(INSTALL_SCRIPT) $(SCRIPTS_built) '$(DESTDIR)$(bindir)/'
 endif # SCRIPTS_built
-
 ifdef MODULE_big
+ifeq ($(with_llvm), yes)
+       $(call install_llvm_module,$(MODULE_big),$(OBJS))
+endif # with_llvm
+
 install: install-lib
 endif # MODULE_big
 
@@ -183,7 +193,10 @@ ifneq (,$(DATA_TSEARCH))
 endif
 ifdef MODULES
        rm -f $(addprefix '$(DESTDIR)$(pkglibdir)'/, $(addsuffix $(DLSUFFIX), $(MODULES)))
-endif
+ifeq ($(with_llvm), yes)
+       $(foreach mod, $(MODULES), $(call uninstall_llvm_module,$(mod)))
+endif # with_llvm
+endif # MODULES
 ifdef DOCS
        rm -f $(addprefix '$(DESTDIR)$(docdir)/$(docmoduledir)'/, $(DOCS))
 endif
@@ -198,13 +211,18 @@ ifdef SCRIPTS_built
 endif
 
 ifdef MODULE_big
+ifeq ($(with_llvm), yes)
+       $(call uninstall_llvm_module,$(MODULE_big))
+endif # with_llvm
+
 uninstall: uninstall-lib
 endif # MODULE_big
 
 
 clean:
 ifdef MODULES
-       rm -f $(addsuffix $(DLSUFFIX), $(MODULES)) $(addsuffix .o, $(MODULES)) $(if $(PGFILEDESC),$(WIN32RES))
+       rm -f $(addsuffix $(DLSUFFIX), $(MODULES)) $(addsuffix .o, $(MODULES)) $(if $(PGFILEDESC),$(WIN32RES)) \
+           $(addsuffix .bc, $(MODULES))
 endif
 ifdef DATA_built
        rm -f $(DATA_built)
@@ -216,7 +234,7 @@ ifdef PROGRAM
        rm -f $(PROGRAM)$(X)
 endif
 ifdef OBJS
-       rm -f $(OBJS)
+       rm -f $(OBJS) $(patsubst %.o,%.bc, $(OBJS))
 endif
 ifdef EXTRA_CLEAN
        rm -rf $(EXTRA_CLEAN)