From 9370462e9a79755aea367c62eb0fef96f0c42258 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 28 Mar 2018 13:19:08 -0700 Subject: [PATCH] Add inlining support to LLVM JIT provider. This provides infrastructure to allow JITed code to inline code implemented in C. This e.g. can be postgres internal functions or extension code. This already speeds up long running queries, by allowing the LLVM optimizer to optimize across function boundaries. The optimization potential currently doesn't reach its full potential because LLVM cannot optimize the FunctionCallInfoData argument fully away, because it's allocated on the heap rather than the stack. Fixing that is beyond what's realistic for v11. To be able to do that, use CLANG to convert C code to LLVM bitcode, and have LLVM build a summary for it. That bitcode can then be used to to inline functions at runtime. For that the bitcode needs to be installed. Postgres bitcode goes into $pkglibdir/bitcode/postgres, extensions go into equivalent directories. PGXS has been modified so that happens automatically if postgres has been compiled with LLVM support. Currently this isn't the fastest inline implementation, modules are reloaded from disk during inlining. That's to work around an apparent LLVM bug, triggering an apparently spurious error in LLVM assertion enabled builds. Once that is resolved we can remove the superfluous read from disk. Docs will follow in a later commit containing docs for the whole JIT feature. Author: Andres Freund Discussion: https://postgr.es/m/20170901064131.tazjxwus3k2w3ybh@alap3.anarazel.de --- src/Makefile.global.in | 34 + src/backend/Makefile | 10 + src/backend/common.mk | 6 +- src/backend/jit/jit.c | 1 + src/backend/jit/llvm/Makefile | 2 +- src/backend/jit/llvm/llvmjit.c | 17 +- src/backend/jit/llvm/llvmjit_inline.cpp | 877 ++++++++++++++++++ src/backend/optimizer/plan/planner.c | 3 + src/backend/utils/misc/guc.c | 10 + src/backend/utils/misc/postgresql.conf.sample | 3 + src/include/jit/jit.h | 6 +- src/include/jit/llvmjit.h | 1 + src/makefiles/pgxs.mk | 26 +- 13 files changed, 988 insertions(+), 8 deletions(-) create mode 100644 src/backend/jit/llvm/llvmjit_inline.cpp diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 859adfc3cb..04cace1017 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -171,6 +171,7 @@ endif # PGXS includedir_server = $(pkgincludedir)/server includedir_internal = $(pkgincludedir)/internal pgxsdir = $(pkglibdir)/pgxs +bitcodedir = $(pkglibdir)/bitcode ########################################################################## @@ -972,3 +973,36 @@ endif %.bc : %.cpp $(COMPILE.cxx.bc) -o $@ $< + +# Install LLVM bitcode module (for JITing). +# +# The arguments are: +# $(1) name of the module (e.g. an extension's name or postgres for core code) +# $(2) source objects, with .o suffix +# +define install_llvm_module +# Create target directory +$(MKDIR_P) "$(DESTDIR)${bitcodedir}/$(1)" +# Create sub-directories, if files are in subdirectories +$(MKDIR_P) $(sort $(dir $(addprefix $(DESTDIR)${bitcodedir}/$(1)/, $(2)))) +# Then install files +# +# The many INSTALL_DATA invocations aren't particularly fast, it'd be +# good if we could coalesce them, but I didn't find a good way. +$(foreach obj, ${2}, $(INSTALL_DATA) $(patsubst %.o,%.bc, $(obj)) $(DESTDIR)/${bitcodedir}/$(1)/$(dir $(obj)); +) +# and generate index +(cd "$(DESTDIR)${bitcodedir}" && $(LLVM_BINPATH)/llvm-lto -thinlto -thinlto-action=thinlink -o $(1).index.bc $(addprefix $(1)/,$(patsubst %.o,%.bc, $(2)))) +endef + +# Uninstall LLVM bitcode module. +# +# The arguments are: +# $(1) name of the module (e.g. an extension's name or postgres for core code) +# +# This intentionally doesn't use the explicit installed file list, +# seems too likely to change regularly. +define uninstall_llvm_module +rm -rf "$(DESTDIR)${bitcodedir}/$(1)/" +rm -f "$(DESTDIR)${bitcodedir}/$(1).index.bc" +endef diff --git a/src/backend/Makefile b/src/backend/Makefile index ca230de2f3..21b094385f 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -252,6 +252,13 @@ endif $(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample' $(INSTALL_DATA) $(srcdir)/access/transam/recovery.conf.sample '$(DESTDIR)$(datadir)/recovery.conf.sample' +ifeq ($(with_llvm), yes) +install-bin: install-postgres-bitcode + +install-postgres-bitcode: $(OBJS) all + $(call install_llvm_module,postgres,$(call expand_subsys, $(filter-out $(top_builddir)/src/timezone/objfiles.txt, $(SUBDIROBJS)))) +endif + install-bin: postgres $(POSTGRES_IMP) installdirs $(INSTALL_PROGRAM) postgres$(X) '$(DESTDIR)$(bindir)/postgres$(X)' ifneq ($(PORTNAME), win32) @@ -309,6 +316,9 @@ endif '$(DESTDIR)$(datadir)/pg_ident.conf.sample' \ '$(DESTDIR)$(datadir)/postgresql.conf.sample' \ '$(DESTDIR)$(datadir)/recovery.conf.sample' +ifeq ($(with_llvm), yes) + $(call uninstall_llvm_module,postgres) +endif ########################################################################## diff --git a/src/backend/common.mk b/src/backend/common.mk index 6eaa353aea..08e7eff6c8 100644 --- a/src/backend/common.mk +++ b/src/backend/common.mk @@ -30,6 +30,10 @@ objfiles.txt: Makefile $(SUBDIROBJS) $(OBJS) # Don't rebuild the list if only the OBJS have changed. $(if $(filter-out $(OBJS),$?),( $(if $(SUBDIROBJS),cat $(SUBDIROBJS); )echo $(addprefix $(subdir)/,$(OBJS)) ) >$@,touch $@) +ifeq ($(with_llvm), yes) +objfiles.txt: $(patsubst %.o,%.bc, $(OBJS)) +endif + # make function to expand objfiles.txt contents expand_subsys = $(foreach file,$(1),$(if $(filter %/objfiles.txt,$(file)),$(patsubst ../../src/backend/%,%,$(addprefix $(top_builddir)/,$(shell cat $(file)))),$(file))) @@ -43,7 +47,7 @@ $(SUBDIRS:%=%-recursive): $(call recurse,clean) clean: clean-local clean-local: - rm -f $(subsysfilename) $(OBJS) + rm -f $(subsysfilename) $(OBJS) $(patsubst %.o,%.bc, $(OBJS)) $(call recurse,coverage) $(call recurse,install) diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c index 67a015fb35..c1703094db 100644 --- a/src/backend/jit/jit.c +++ b/src/backend/jit/jit.c @@ -40,6 +40,7 @@ bool jit_expressions = true; bool jit_profiling_support = false; bool jit_tuple_deforming = true; double jit_above_cost = 100000; +double jit_inline_above_cost = 500000; double jit_optimize_above_cost = 500000; static JitProviderCallbacks provider; diff --git a/src/backend/jit/llvm/Makefile b/src/backend/jit/llvm/Makefile index d6a1f5f02d..d7a36d7371 100644 --- a/src/backend/jit/llvm/Makefile +++ b/src/backend/jit/llvm/Makefile @@ -37,7 +37,7 @@ override COMPILER = $(CXX) $(CFLAGS) OBJS=$(WIN32RES) # Infrastructure -OBJS += llvmjit.o llvmjit_error.o llvmjit_wrap.o +OBJS += llvmjit.o llvmjit_error.o llvmjit_inline.o llvmjit_wrap.o # Code generation OBJS += llvmjit_expr.o llvmjit_deform.o diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c index 5a33e52e1d..daae964b1c 100644 --- a/src/backend/jit/llvm/llvmjit.c +++ b/src/backend/jit/llvm/llvmjit.c @@ -468,6 +468,10 @@ llvm_optimize_module(LLVMJitContext *context, LLVMModuleRef module) /* always use always-inliner pass */ if (!(context->base.flags & PGJIT_OPT3)) LLVMAddAlwaysInlinerPass(llvm_mpm); + /* if doing inlining, but no expensive optimization, add inlining pass */ + if (context->base.flags & PGJIT_INLINE + && !(context->base.flags & PGJIT_OPT3)) + LLVMAddFunctionInliningPass(llvm_mpm); LLVMRunPassManager(llvm_mpm, context->module); LLVMDisposePassManager(llvm_mpm); @@ -491,6 +495,16 @@ llvm_compile_module(LLVMJitContext *context) else compile_orc = llvm_opt0_orc; + /* perform inlining */ + if (context->base.flags & PGJIT_INLINE) + { + INSTR_TIME_SET_CURRENT(starttime); + llvm_inline(context->module); + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(context->base.inlining_counter, + endtime, starttime); + } + if (jit_dump_bitcode) { char *filename; @@ -578,7 +592,8 @@ llvm_compile_module(LLVMJitContext *context) MemoryContextSwitchTo(oldcontext); ereport(DEBUG1, - (errmsg("time to opt: %.3fs, emit: %.3fs", + (errmsg("time to inline: %.3fs, opt: %.3fs, emit: %.3fs", + INSTR_TIME_GET_DOUBLE(context->base.inlining_counter), INSTR_TIME_GET_DOUBLE(context->base.optimization_counter), INSTR_TIME_GET_DOUBLE(context->base.emission_counter)), errhidestmt(true), diff --git a/src/backend/jit/llvm/llvmjit_inline.cpp b/src/backend/jit/llvm/llvmjit_inline.cpp new file mode 100644 index 0000000000..130e2ab415 --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_inline.cpp @@ -0,0 +1,877 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_inline.cpp + * Cross module inlining suitable for postgres' JIT + * + * The inliner iterates over external functions referenced from the passed + * module and attempts to inline those. It does so by utilizing pre-built + * indexes over both postgres core code and extension modules. When a match + * for an external function is found - not guaranteed! - the index will then + * be used to judge their instruction count / inline worthiness. After doing + * so for all external functions, all the referenced functions (and + * prerequisites) will be imorted. + * + * Copyright (c) 2016-2018, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/llvmjit/llvmjit_inline.c + * + *------------------------------------------------------------------------- + */ + +extern "C" +{ +#include "postgres.h" +} + +#include "jit/llvmjit.h" + +extern "C" +{ +#include +#include +#include +#include +#include + +#include "common/string.h" +#include "miscadmin.h" +#include "storage/fd.h" +} + +#include +#include + +#include +#include +#include +#include +#if LLVM_VERSION_MAJOR > 3 +#include +#else +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Type used to represent modules InlineWorkListItem's subject is searched for + * in. + */ +typedef llvm::SmallVector InlineSearchPath; + +/* + * Item in queue of to-be-checked symbols and corresponding queue. + */ +typedef struct InlineWorkListItem +{ + llvm::StringRef symbolName; + llvm::SmallVector searchpath; +} InlineWorkListItem; +typedef llvm::SmallVector InlineWorkList; + +/* + * Information about symbols processed during inlining. Used to prevent + * repeated searches and provide additional information. + */ +typedef struct FunctionInlineState +{ + int costLimit; + bool processed; + bool inlined; + bool allowReconsidering; +} FunctionInlineState; +typedef llvm::StringMap FunctionInlineStates; + +/* + * Map of modules that should be inlined, with a list of the to-be inlined + * symbols. + */ +typedef llvm::StringMap > ImportMapTy; + + +const float inline_cost_decay_factor = 0.5; +const int inline_initial_cost = 150; + +/* + * These are managed statics so LLVM knows to deallocate them during an + * LLVMShutdown(), rather than after (which'd cause crashes). + */ +typedef llvm::StringMap > ModuleCache; +llvm::ManagedStatic module_cache; +typedef llvm::StringMap > SummaryCache; +llvm::ManagedStatic summary_cache; + + +static std::unique_ptr llvm_build_inline_plan(llvm::Module *mod); +static void llvm_execute_inline_plan(llvm::Module *mod, + ImportMapTy *globalsToInline); + +static llvm::Module* load_module_cached(llvm::StringRef modPath); +static std::unique_ptr load_module(llvm::StringRef Identifier); +static std::unique_ptr llvm_load_summary(llvm::StringRef path); + + +static llvm::Function* create_redirection_function(std::unique_ptr &importMod, + llvm::Function *F, + llvm::StringRef Name); + +static bool function_inlinable(llvm::Function &F, + int threshold, + FunctionInlineStates &functionState, + InlineWorkList &worklist, + InlineSearchPath &searchpath, + llvm::SmallPtrSet &visitedFunctions, + int &running_instcount, + llvm::StringSet<> &importVars); +static void function_references(llvm::Function &F, + int &running_instcount, + llvm::SmallPtrSet &referencedVars, + llvm::SmallPtrSet &referencedFunctions); + +static void add_module_to_inline_search_path(InlineSearchPath& path, llvm::StringRef modpath); +static llvm::SmallVector +summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid); + +/* verbose debugging for inliner development */ +/* #define INLINE_DEBUG */ +#ifdef INLINE_DEBUG +#define ilog elog +#else +#define ilog(...) (void) 0 +#endif + +/* + * Perform inlining of external function references in M based on a simple + * cost based analysis. + */ +void +llvm_inline(LLVMModuleRef M) +{ + llvm::Module *mod = llvm::unwrap(M); + + std::unique_ptr globalsToInline = llvm_build_inline_plan(mod); + if (!globalsToInline) + return; + llvm_execute_inline_plan(mod, globalsToInline.get()); +} + +/* + * Build information necessary for inlining external function references in + * mod. + */ +static std::unique_ptr +llvm_build_inline_plan(llvm::Module *mod) +{ + std::unique_ptr globalsToInline = llvm::make_unique(); + FunctionInlineStates functionStates; + InlineWorkList worklist; + + InlineSearchPath defaultSearchPath; + + /* attempt to add module to search path */ + add_module_to_inline_search_path(defaultSearchPath, "$libdir/postgres"); + /* if postgres isn't available, no point continuing */ + if (defaultSearchPath.empty()) + return nullptr; + + /* + * Start inlining with current references to external functions by putting + * them on the inlining worklist. If, during inlining of those, new extern + * functions need to be inlined, they'll also be put there, with a lower + * priority. + */ + for (const llvm::Function &funcDecl : mod->functions()) + { + InlineWorkListItem item = {}; + FunctionInlineState inlineState = {}; + + /* already has a definition */ + if (!funcDecl.isDeclaration()) + continue; + + /* llvm provides implementation */ + if (funcDecl.isIntrinsic()) + continue; + + item.symbolName = funcDecl.getName(); + item.searchpath = defaultSearchPath; + worklist.push_back(item); + inlineState.costLimit = inline_initial_cost; + inlineState.processed = false; + inlineState.inlined = false; + inlineState.allowReconsidering = false; + functionStates[funcDecl.getName()] = inlineState; + } + + /* + * Iterate over pending worklist items, look them up in index, check + * whether they should be inlined. + */ + while (!worklist.empty()) + { + InlineWorkListItem item = worklist.pop_back_val(); + llvm::StringRef symbolName = item.symbolName; + char *cmodname; + char *cfuncname; + FunctionInlineState &inlineState = functionStates[symbolName]; + llvm::GlobalValue::GUID funcGUID; + + llvm_split_symbol_name(symbolName.data(), &cmodname, &cfuncname); + + funcGUID = llvm::GlobalValue::getGUID(cfuncname); + + /* already processed */ + if (inlineState.processed) + continue; + + + if (cmodname) + add_module_to_inline_search_path(item.searchpath, cmodname); + + /* + * Iterate over all known definitions of function, via the index. Then + * look up module(s), check if function actually is defined (there + * could be hash conflicts). + */ + for (const auto &gvs : summaries_for_guid(item.searchpath, funcGUID)) + { + const llvm::FunctionSummary *fs; + llvm::StringRef modPath = gvs->modulePath(); + llvm::Module *defMod; + llvm::Function *funcDef; + + fs = llvm::cast(gvs); + +#if LLVM_VERSION_MAJOR > 3 + if (gvs->notEligibleToImport()) + { + ilog(DEBUG1, "ineligibile to import %s due to summary", + symbolName.data()); + continue; + } +#endif + + if ((int) fs->instCount() > inlineState.costLimit) + { + ilog(DEBUG1, "ineligibile to import %s due to early threshold: %u vs %u", + symbolName.data(), fs->instCount(), inlineState.costLimit); + inlineState.allowReconsidering = true; + continue; + } + + defMod = load_module_cached(modPath); + if (defMod->materializeMetadata()) + elog(FATAL, "failed to materialize metadata"); + + funcDef = defMod->getFunction(cfuncname); + + /* + * This can happen e.g. in case of a hash collision of the + * function's name. + */ + if (!funcDef) + continue; + + if (funcDef->materialize()) + elog(FATAL, "failed to materialize metadata"); + + Assert(!funcDef->isDeclaration()); + Assert(funcDef->hasExternalLinkage()); + + /* don't inline functions marked as noinline */ + if (funcDef->getAttributes().hasFnAttribute(llvm::Attribute::NoInline)) + { + ilog(DEBUG1, "ineligibile to import %s due to noinline", + symbolName.data()); + continue; + } + + llvm::StringSet<> importVars; + llvm::SmallPtrSet visitedFunctions; + int running_instcount = 0; + + /* + * Check whether function, and objects it depends on, are + * inlinable. + */ + if (function_inlinable(*funcDef, + inlineState.costLimit, + functionStates, + worklist, + item.searchpath, + visitedFunctions, + running_instcount, + importVars)) + { + /* + * Check whether function and all its dependencies are too + * big. Dependencies already counted for other functions that + * will get inlined are not counted again. While this make + * things somewhat order dependant, I can't quite see a point + * in a different behaviour. + */ + if (running_instcount > inlineState.costLimit) + { + ilog(DEBUG1, "skipping inlining of %s due to late threshold %d vs %d", + symbolName.data(), running_instcount, inlineState.costLimit); + inlineState.allowReconsidering = true; + continue; + } + + ilog(DEBUG1, "inline top function %s total_instcount: %d, partial: %d", + symbolName.data(), running_instcount, fs->instCount()); + + /* import referenced function itself */ + importVars.insert(symbolName); + + { + llvm::StringSet<> &modGlobalsToInline = (*globalsToInline)[modPath]; + for (auto& importVar : importVars) + modGlobalsToInline.insert(importVar.first()); + Assert(modGlobalsToInline.size() > 0); + } + + /* mark function as inlined */ + inlineState.inlined = true; + + /* + * Found definition to inline, don't look for further + * potential definitions. + */ + break; + } + else + { + ilog(DEBUG1, "had to skip inlining %s", + symbolName.data()); + + /* It's possible there's another definition that's inlinable. */ + } + } + + /* + * Signal that we're done with symbol, whether successful (inlined = + * true above) or not. + */ + inlineState.processed = true; + } + + return globalsToInline; +} + +/* + * Perform the actual inlining of external functions (and their dependencies) + * into mod. + */ +static void +llvm_execute_inline_plan(llvm::Module *mod, ImportMapTy *globalsToInline) +{ + llvm::IRMover Mover(*mod); + + for (const auto& toInline : *globalsToInline) + { + const llvm::StringRef& modPath = toInline.first(); + const llvm::StringSet<>& modGlobalsToInline = toInline.second; + llvm::SetVector GlobalsToImport; + + Assert(module_cache->count(modPath)); + std::unique_ptr importMod(std::move((*module_cache)[modPath])); + module_cache->erase(modPath); + + if (modGlobalsToInline.empty()) + continue; + + for (auto &glob: modGlobalsToInline) + { + llvm::StringRef SymbolName = glob.first(); + char *modname; + char *funcname; + + llvm_split_symbol_name(SymbolName.data(), &modname, &funcname); + + llvm::GlobalValue *valueToImport = importMod->getNamedValue(funcname); + + if (!valueToImport) + elog(FATAL, "didn't refind value %s to import", SymbolName.data()); + + /* + * For functions (global vars are only inlined if already static), + * mark imported variables as being clones from other + * functions. That a) avoids symbol conflicts b) allows the + * optimizer to perform inlining. + */ + if (llvm::isa(valueToImport)) + { + llvm::Function *F = llvm::dyn_cast(valueToImport); + typedef llvm::GlobalValue::LinkageTypes LinkageTypes; + + /* + * Per-function info isn't necessarily stripped yet, as the + * module is lazy-loaded when stripped above. + */ + llvm::stripDebugInfo(*F); + + /* + * If the to-be-imported function is one referenced including + * its module name, create a tiny inline function that just + * forwards the call. One might think a GlobalAlias would do + * the trick, but a) IRMover doesn't override a declaration + * with an alias pointing to a definition (instead renaming + * it), b) Aliases can't be AvailableExternally. + */ + if (modname) + { + llvm::Function *AF; + + AF = create_redirection_function(importMod, F, SymbolName); + + GlobalsToImport.insert(AF); + llvm::stripDebugInfo(*AF); + } + + if (valueToImport->hasExternalLinkage()) + { + valueToImport->setLinkage(LinkageTypes::AvailableExternallyLinkage); + } + } + + GlobalsToImport.insert(valueToImport); + ilog(DEBUG1, "performing import of %s %s", + modPath.data(), SymbolName.data()); + + } + +#if LLVM_VERSION_MAJOR > 4 +#define IRMOVE_PARAMS , /*IsPerformingImport=*/false +#elif LLVM_VERSION_MAJOR > 3 +#define IRMOVE_PARAMS , /*LinkModuleInlineAsm=*/false, /*IsPerformingImport=*/false +#else +#define IRMOVE_PARAMS +#endif + if (Mover.move(std::move(importMod), GlobalsToImport.getArrayRef(), + [](llvm::GlobalValue &, llvm::IRMover::ValueAdder) {} + IRMOVE_PARAMS)) + elog(FATAL, "function import failed with linker error"); + } +} + +/* + * Return a module identified by modPath, caching it in memory. + * + * Note that such a module may *not* be modified without copying, otherwise + * the cache state would get corrupted. + */ +static llvm::Module* +load_module_cached(llvm::StringRef modPath) +{ + auto it = module_cache->find(modPath); + if (it == module_cache->end()) + { + it = module_cache->insert( + std::make_pair(modPath, load_module(modPath))).first; + } + + return it->second.get(); +} + +static std::unique_ptr +load_module(llvm::StringRef Identifier) +{ + LLVMMemoryBufferRef buf; + LLVMModuleRef mod; + char path[MAXPGPATH]; + char *msg; + + snprintf(path, MAXPGPATH,"%s/bitcode/%s", pkglib_path, Identifier.data()); + + if (LLVMCreateMemoryBufferWithContentsOfFile(path, &buf, &msg)) + elog(FATAL, "failed to open bitcode file \"%s\": %s", + path, msg); + if (LLVMGetBitcodeModuleInContext2(LLVMGetGlobalContext(), buf, &mod)) + elog(FATAL, "failed to parse bitcode in file \"%s\"", path); + + /* + * Currently there's no use in more detailed debug info for JITed + * code. Until that changes, not much point in wasting memory and cycles + * on processing debuginfo. + */ + llvm::StripDebugInfo(*llvm::unwrap(mod)); + + return std::unique_ptr(llvm::unwrap(mod)); +} + +/* + * Compute list of referenced variables, functions and the instruction count + * for a function. + */ +static void +function_references(llvm::Function &F, + int &running_instcount, + llvm::SmallPtrSet &referencedVars, + llvm::SmallPtrSet &referencedFunctions) +{ + llvm::SmallPtrSet Visited; + + for (llvm::BasicBlock &BB : F) + { + for (llvm::Instruction &I : BB) + { + if (llvm::isa(I)) + continue; + + llvm::SmallVector Worklist; + Worklist.push_back(&I); + + running_instcount++; + + while (!Worklist.empty()) { + llvm::User *U = Worklist.pop_back_val(); + + /* visited before */ + if (!Visited.insert(U).second) + continue; + + for (auto &OI : U->operands()) { + llvm::User *Operand = llvm::dyn_cast(OI); + if (!Operand) + continue; + if (llvm::isa(Operand)) + continue; + if (auto *GV = llvm::dyn_cast(Operand)) { + referencedVars.insert(GV); + if (GV->hasInitializer()) + Worklist.push_back(GV->getInitializer()); + continue; + } + if (auto *CF = llvm::dyn_cast(Operand)) { + referencedFunctions.insert(CF); + continue; + } + Worklist.push_back(Operand); + } + } + } + } +} + +/* + * Check whether function F is inlinable and, if so, what globals need to be + * imported. + * + * References to external functions from, potentially recursively, inlined + * functions are added to the passed in worklist. + */ +static bool +function_inlinable(llvm::Function &F, + int threshold, + FunctionInlineStates &functionStates, + InlineWorkList &worklist, + InlineSearchPath &searchpath, + llvm::SmallPtrSet &visitedFunctions, + int &running_instcount, + llvm::StringSet<> &importVars) +{ + int subThreshold = threshold * inline_cost_decay_factor; + llvm::SmallPtrSet referencedVars; + llvm::SmallPtrSet referencedFunctions; + + /* can't rely on what may be inlined */ + if (F.isInterposable()) + return false; + + /* + * Can't rely on function being present. Alternatively we could create a + * static version of these functions? + */ + if (F.hasAvailableExternallyLinkage()) + return false; + + ilog(DEBUG1, "checking inlinability of %s", F.getName().data()); + + if (F.materialize()) + elog(FATAL, "failed to materialize metadata"); + + function_references(F, running_instcount, referencedVars, referencedFunctions); + + for (llvm::GlobalVariable* rv: referencedVars) + { + if (rv->materialize()) + elog(FATAL, "failed to materialize metadata"); + + /* + * Never want to inline externally visible vars, cheap enough to + * reference. + */ + if (rv->hasExternalLinkage() || rv->hasAvailableExternallyLinkage()) + continue; + + /* + * If variable is file-local, we need to inline it, to be able to + * inline the function itself. Can't do that if the variable can be + * modified, because they'd obviously get out of sync. + * + * XXX: Currently not a problem, but there'd be problems with + * nontrivial initializers if they were allowed for postgres. + */ + if (!rv->isConstant()) + { + ilog(DEBUG1, "cannot inline %s due to uncloneable variable %s", + F.getName().data(), rv->getName().data()); + return false; + } + + ilog(DEBUG1, "memorizing global var %s linkage %d for inlining", + rv->getName().data(), (int)rv->getLinkage()); + + importVars.insert(rv->getName()); + /* small cost attributed to each cloned global */ + running_instcount += 5; + } + + visitedFunctions.insert(&F); + + /* + * Check referenced functions. Check whether used static ones are + * inlinable, and remember external ones for inlining. + */ + for (llvm::Function* referencedFunction: referencedFunctions) + { + llvm::StringSet<> recImportVars; + + if (referencedFunction->materialize()) + elog(FATAL, "failed to materialize metadata"); + + if (referencedFunction->isIntrinsic()) + continue; + + /* if already visited skip, otherwise remember */ + if (!visitedFunctions.insert(referencedFunction).second) + continue; + + /* + * We don't inline external functions directly here, instead we put + * them on the worklist if appropriate and check them from + * llvm_build_inline_plan(). + */ + if (referencedFunction->hasExternalLinkage()) + { + llvm::StringRef funcName = referencedFunction->getName(); + + /* + * Don't bother checking for inlining if remaining cost budget is + * very small. + */ + if (subThreshold < 5) + continue; + + auto it = functionStates.find(funcName); + if (it == functionStates.end()) + { + FunctionInlineState inlineState; + + inlineState.costLimit = subThreshold; + inlineState.processed = false; + inlineState.inlined = false; + inlineState.allowReconsidering = false; + + functionStates[funcName] = inlineState; + worklist.push_back({funcName, searchpath}); + + ilog(DEBUG1, + "considering extern function %s at %d for inlining", + funcName.data(), subThreshold); + } + else if (!it->second.inlined && + (!it->second.processed || it->second.allowReconsidering) && + it->second.costLimit < subThreshold) + { + /* + * Update inlining threshold if higher. Need to re-queue + * to be processed if already processed with lower + * threshold. + */ + if (it->second.processed) + { + ilog(DEBUG1, + "reconsidering extern function %s at %d for inlining, increasing from %d", + funcName.data(), subThreshold, it->second.costLimit); + + it->second.processed = false; + it->second.allowReconsidering = false; + worklist.push_back({funcName, searchpath}); + } + it->second.costLimit = subThreshold; + } + continue; + } + + /* can't rely on what may be inlined */ + if (referencedFunction->isInterposable()) + return false; + + if (!function_inlinable(*referencedFunction, + subThreshold, + functionStates, + worklist, + searchpath, + visitedFunctions, + running_instcount, + recImportVars)) + { + ilog(DEBUG1, + "cannot inline %s due to required function %s not being inlinable", + F.getName().data(), referencedFunction->getName().data()); + return false; + } + + /* import referenced function itself */ + importVars.insert(referencedFunction->getName()); + + /* import referenced function and its dependants */ + for (auto& recImportVar : recImportVars) + importVars.insert(recImportVar.first()); + } + + return true; +} + +/* + * Attempt to load module summary located at path. Return empty pointer when + * loading fails. + */ +static std::unique_ptr +llvm_load_summary(llvm::StringRef path) +{ + llvm::ErrorOr > MBOrErr = + llvm::MemoryBuffer::getFile(path); + + if (std::error_code EC = MBOrErr.getError()) + { + ilog(DEBUG1, "failed to open %s: %s", path.data(), + EC.message().c_str()); + } + else + { + llvm::MemoryBufferRef ref(*MBOrErr.get().get()); + +#if LLVM_VERSION_MAJOR > 3 + llvm::Expected > IndexOrErr = + llvm::getModuleSummaryIndex(ref); + if (IndexOrErr) + return std::move(IndexOrErr.get()); + elog(FATAL, "failed to load summary \"%s\": %s", + path.data(), + toString(IndexOrErr.takeError()).c_str()); +#else + llvm::ErrorOr > IndexOrErr = + llvm::getModuleSummaryIndex(ref, [](const llvm::DiagnosticInfo &) {}); + if (IndexOrErr) + return std::move(IndexOrErr.get()); + elog(FATAL, "failed to load summary \"%s\": %s", + path.data(), + IndexOrErr.getError().message().c_str()); +#endif + } + return nullptr; +} + +/* + * Attempt to add modpath to the search path. + */ +static void +add_module_to_inline_search_path(InlineSearchPath& searchpath, llvm::StringRef modpath) +{ + /* only extension in libdir are candidates for inlining for now */ + if (!modpath.startswith("$libdir/")) + return; + + /* if there's no match, attempt to load */ + auto it = summary_cache->find(modpath); + if (it == summary_cache->end()) + { + std::string path(modpath); + path = path.replace(0, strlen("$libdir"), std::string(pkglib_path) + "/bitcode"); + path += ".index.bc"; + (*summary_cache)[modpath] = llvm_load_summary(path); + it = summary_cache->find(modpath); + } + + Assert(it != summary_cache->end()); + + /* if the entry isn't NULL, it's validly loaded */ + if (it->second) + searchpath.push_back(it->second.get()); +} + +/* + * Search for all references for functions hashing to guid in the search path, + * and return them in search path order. + */ +static llvm::SmallVector +summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid) +{ + llvm::SmallVector matches; + + for (auto index : path) + { +#if LLVM_VERSION_MAJOR > 4 + llvm::ValueInfo funcVI = index->getValueInfo(guid); + + /* if index doesn't know function, we don't have a body, continue */ + if (funcVI) + for (auto &gv : funcVI.getSummaryList()) + matches.push_back(gv.get()); +#else + const llvm::const_gvsummary_iterator &I = + index->findGlobalValueSummaryList(guid); + if (I != index->end()) + { + for (auto &gv : I->second) + matches.push_back(gv.get()); + } +#endif + } + + return matches; +} + +/* + * Create inline wrapper with the name Name, redirecting the call to F. + */ +static llvm::Function* +create_redirection_function(std::unique_ptr &importMod, + llvm::Function *F, + llvm::StringRef Name) +{ + typedef llvm::GlobalValue::LinkageTypes LinkageTypes; + + llvm::LLVMContext &Context = F->getContext(); + llvm::IRBuilder<> Builder(Context); + llvm::Function *AF; + llvm::BasicBlock *BB; + llvm::CallInst *fwdcall; + llvm::Attribute inlineAttribute; + + AF = llvm::Function::Create(F->getFunctionType(), + LinkageTypes::AvailableExternallyLinkage, + Name, importMod.get()); + BB = llvm::BasicBlock::Create(Context, "entry", AF); + + Builder.SetInsertPoint(BB); + fwdcall = Builder.CreateCall(F, &*AF->arg_begin()); + inlineAttribute = llvm::Attribute::get(Context, + llvm::Attribute::AlwaysInline); + fwdcall->addAttribute(~0U, inlineAttribute); + Builder.CreateRet(fwdcall); + + return AF; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 52c21e6870..a19f5d0c02 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -544,6 +544,9 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) if (jit_optimize_above_cost >= 0 && top_plan->total_cost > jit_optimize_above_cost) result->jitFlags |= PGJIT_OPT3; + if (jit_inline_above_cost >= 0 && + top_plan->total_cost > jit_inline_above_cost) + result->jitFlags |= PGJIT_INLINE; /* * Decide which operations should be JITed. diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index d075cb139a..4ffc8451ca 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3117,6 +3117,16 @@ static struct config_real ConfigureNamesReal[] = NULL, NULL, NULL }, + { + {"jit_inline_above_cost", PGC_USERSET, QUERY_TUNING_COST, + gettext_noop("Perform JIT inlining if query is more expensive."), + gettext_noop("-1 disables inlining.") + }, + &jit_inline_above_cost, + 500000, -1, DBL_MAX, + NULL, NULL, NULL + }, + { {"cursor_tuple_fraction", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Sets the planner's estimate of the fraction of " diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4b692dc3e5..66d0938827 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -323,6 +323,9 @@ # and query more expensive, -1 disables #jit_optimize_above_cost = 500000 # optimize JITed functions if query is # more expensive, -1 disables +#jit_inline_above_cost = 500000 # attempt to inline operators and + # functions if query is more expensive, + # -1 disables #min_parallel_table_scan_size = 8MB #min_parallel_index_scan_size = 512kB diff --git a/src/include/jit/jit.h b/src/include/jit/jit.h index efcd6a52cf..85d234ff3b 100644 --- a/src/include/jit/jit.h +++ b/src/include/jit/jit.h @@ -19,7 +19,7 @@ #define PGJIT_NONE 0 #define PGJIT_PERFORM 1 << 0 #define PGJIT_OPT3 1 << 1 -/* reserved for PGJIT_INLINE */ +#define PGJIT_INLINE 1 << 2 #define PGJIT_EXPR 1 << 3 #define PGJIT_DEFORM 1 << 4 @@ -37,6 +37,9 @@ typedef struct JitContext /* accumulated time to generate code */ instr_time generation_counter; + /* accumulated time for inlining */ + instr_time inlining_counter; + /* accumulated time for optimization */ instr_time optimization_counter; @@ -70,6 +73,7 @@ extern bool jit_expressions; extern bool jit_profiling_support; extern bool jit_tuple_deforming; extern double jit_above_cost; +extern double jit_inline_above_cost; extern double jit_optimize_above_cost; diff --git a/src/include/jit/llvmjit.h b/src/include/jit/llvmjit.h index f6aed64d8d..dc8fa57f84 100644 --- a/src/include/jit/llvmjit.h +++ b/src/include/jit/llvmjit.h @@ -103,6 +103,7 @@ extern LLVMValueRef llvm_function_reference(LLVMJitContext *context, LLVMModuleRef mod, FunctionCallInfo fcinfo); +extern void llvm_inline(LLVMModuleRef mod); /* **************************************************************************** diff --git a/src/makefiles/pgxs.mk b/src/makefiles/pgxs.mk index c27004ecfb..5beb1e6b3c 100644 --- a/src/makefiles/pgxs.mk +++ b/src/makefiles/pgxs.mk @@ -101,6 +101,10 @@ endif all: $(PROGRAM) $(DATA_built) $(SCRIPTS_built) $(addsuffix $(DLSUFFIX), $(MODULES)) $(addsuffix .control, $(EXTENSION)) +ifeq ($(with_llvm), yes) +all: $(addsuffix .bc, $(MODULES)) $(patsubst %.o,%.bc, $(OBJS)) +endif + ifdef MODULE_big # shared library parameters NAME = $(MODULE_big) @@ -123,6 +127,9 @@ ifneq (,$(DATA_TSEARCH)) endif # DATA_TSEARCH ifdef MODULES $(INSTALL_SHLIB) $(addsuffix $(DLSUFFIX), $(MODULES)) '$(DESTDIR)$(pkglibdir)/' +ifeq ($(with_llvm), yes) + $(foreach mod, $(MODULES), $(call install_llvm_module,$(mod),$(mod).bc)) +endif # with_llvm endif # MODULES ifdef DOCS ifdef docdir @@ -138,8 +145,11 @@ endif # SCRIPTS ifdef SCRIPTS_built $(INSTALL_SCRIPT) $(SCRIPTS_built) '$(DESTDIR)$(bindir)/' endif # SCRIPTS_built - ifdef MODULE_big +ifeq ($(with_llvm), yes) + $(call install_llvm_module,$(MODULE_big),$(OBJS)) +endif # with_llvm + install: install-lib endif # MODULE_big @@ -183,7 +193,10 @@ ifneq (,$(DATA_TSEARCH)) endif ifdef MODULES rm -f $(addprefix '$(DESTDIR)$(pkglibdir)'/, $(addsuffix $(DLSUFFIX), $(MODULES))) -endif +ifeq ($(with_llvm), yes) + $(foreach mod, $(MODULES), $(call uninstall_llvm_module,$(mod))) +endif # with_llvm +endif # MODULES ifdef DOCS rm -f $(addprefix '$(DESTDIR)$(docdir)/$(docmoduledir)'/, $(DOCS)) endif @@ -198,13 +211,18 @@ ifdef SCRIPTS_built endif ifdef MODULE_big +ifeq ($(with_llvm), yes) + $(call uninstall_llvm_module,$(MODULE_big)) +endif # with_llvm + uninstall: uninstall-lib endif # MODULE_big clean: ifdef MODULES - rm -f $(addsuffix $(DLSUFFIX), $(MODULES)) $(addsuffix .o, $(MODULES)) $(if $(PGFILEDESC),$(WIN32RES)) + rm -f $(addsuffix $(DLSUFFIX), $(MODULES)) $(addsuffix .o, $(MODULES)) $(if $(PGFILEDESC),$(WIN32RES)) \ + $(addsuffix .bc, $(MODULES)) endif ifdef DATA_built rm -f $(DATA_built) @@ -216,7 +234,7 @@ ifdef PROGRAM rm -f $(PROGRAM)$(X) endif ifdef OBJS - rm -f $(OBJS) + rm -f $(OBJS) $(patsubst %.o,%.bc, $(OBJS)) endif ifdef EXTRA_CLEAN rm -rf $(EXTRA_CLEAN) -- 2.40.0