/*------------------------------------------------------------------------- * * postgres.c * POSTGRES C Backend Interface * * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.329 2003/05/02 21:59:31 momjian Exp $ * * NOTES * this is the "main" module of the postgres backend and * hence the main module of the "traffic cop". * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include #include #include #include #include #include #if HAVE_SYS_SELECT_H #include #endif #ifdef HAVE_GETOPT_H #include #endif #include "access/xlog.h" #include "commands/async.h" #include "commands/trigger.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "libpq/pqsignal.h" #include "miscadmin.h" #include "nodes/print.h" #include "optimizer/cost.h" #include "optimizer/planner.h" #include "parser/analyze.h" #include "parser/parser.h" #include "rewrite/rewriteHandler.h" #include "storage/freespace.h" #include "storage/ipc.h" #include "storage/proc.h" #include "tcop/fastpath.h" #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "tcop/utility.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/ps_status.h" #include "mb/pg_wchar.h" #include "pgstat.h" extern int optind; extern char *optarg; /* ---------------- * global variables * ---------------- */ const char *debug_query_string; /* for pgmonitor and * log_min_error_statement */ /* Note: whereToSendOutput is initialized for the bootstrap/standalone case */ CommandDest whereToSendOutput = Debug; /* note: these declarations had better match tcopprot.h */ sigjmp_buf Warn_restart; bool Warn_restart_ready = false; bool InError = false; extern bool autocommit; static bool EchoQuery = false; /* default don't echo */ /* * Flag to mark SIGHUP. Whenever the main loop comes around it * will reread the configuration file. (Better than doing the * reading in the signal handler, ey?) */ static volatile bool got_SIGHUP = false; /* ---------------- * people who want to use EOF should #define DONTUSENEWLINE in * tcop/tcopdebug.h * ---------------- */ #ifndef TCOP_DONTUSENEWLINE int UseNewLine = 1; /* Use newlines query delimiters (the * default) */ #else int UseNewLine = 0; /* Use EOF as query delimiters */ #endif /* TCOP_DONTUSENEWLINE */ /* ** Flags for expensive function optimization -- JMH 3/9/92 */ int XfuncMode = 0; /* ---------------------------------------------------------------- * decls for routines only used in this file * ---------------------------------------------------------------- */ static int InteractiveBackend(StringInfo inBuf); static int SocketBackend(StringInfo inBuf); static int ReadCommand(StringInfo inBuf); static void start_xact_command(void); static void finish_xact_command(bool forceCommit); static void SigHupHandler(SIGNAL_ARGS); static void FloatExceptionHandler(SIGNAL_ARGS); /* ---------------------------------------------------------------- * routines to obtain user input * ---------------------------------------------------------------- */ /* ---------------- * InteractiveBackend() is called for user interactive connections * * the string entered by the user is placed in its parameter inBuf, * and we act like a Q message was received. * * EOF is returned if end-of-file input is seen; time to shut down. * ---------------- */ static int InteractiveBackend(StringInfo inBuf) { int c; /* character read from getc() */ bool end = false; /* end-of-input flag */ bool backslashSeen = false; /* have we seen a \ ? */ /* * display a prompt and obtain input from the user */ printf("backend> "); fflush(stdout); /* Reset inBuf to empty */ inBuf->len = 0; inBuf->data[0] = '\0'; inBuf->cursor = 0; for (;;) { if (UseNewLine) { /* * if we are using \n as a delimiter, then read characters * until the \n. */ while ((c = getc(stdin)) != EOF) { if (c == '\n') { if (backslashSeen) { /* discard backslash from inBuf */ inBuf->data[--inBuf->len] = '\0'; backslashSeen = false; continue; } else { /* keep the newline character */ appendStringInfoChar(inBuf, '\n'); break; } } else if (c == '\\') backslashSeen = true; else backslashSeen = false; appendStringInfoChar(inBuf, (char) c); } if (c == EOF) end = true; } else { /* * otherwise read characters until EOF. */ while ((c = getc(stdin)) != EOF) appendStringInfoChar(inBuf, (char) c); if (inBuf->len == 0) end = true; } if (end) return EOF; /* * otherwise we have a user query so process it. */ break; } /* Add '\0' to make it look the same as message case. */ appendStringInfoChar(inBuf, (char) '\0'); /* * if the query echo flag was given, print the query.. */ if (EchoQuery) printf("statement: %s\n", inBuf->data); fflush(stdout); return 'Q'; } /* ---------------- * SocketBackend() Is called for frontend-backend connections * * Returns the message type code, and loads message body data into inBuf. * * EOF is returned if the connection is lost. * ---------------- */ static int SocketBackend(StringInfo inBuf) { int qtype; /* * Get message type code from the frontend. */ qtype = pq_getbyte(); if (qtype == EOF) /* frontend disconnected */ { elog(COMMERROR, "unexpected EOF on client connection"); return qtype; } /* * Validate message type code before trying to read body; if we have * lost sync, better to say "command unknown" than to run out of memory * because we used garbage as a length word. */ switch (qtype) { case 'Q': /* simple query */ if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3) { /* old style without length word; convert */ if (pq_getstring(inBuf)) { elog(COMMERROR, "unexpected EOF on client connection"); return EOF; } } break; case 'F': /* fastpath function call */ break; case 'X': /* terminate */ break; case 'd': /* copy data */ case 'c': /* copy done */ case 'f': /* copy fail */ /* Accept but ignore these messages, per protocol spec */ break; default: /* * Otherwise we got garbage from the frontend. We treat this * as fatal because we have probably lost message boundary sync, * and there's no good way to recover. */ elog(FATAL, "Socket command type %c unknown", qtype); break; } /* * In protocol version 3, all frontend messages have a length word * next after the type code; we can read the message contents * independently of the type. */ if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3) { if (pq_getmessage(inBuf, 0)) return EOF; /* suitable message already logged */ } return qtype; } /* ---------------- * ReadCommand reads a command from either the frontend or * standard input, places it in inBuf, and returns the * message type code (first byte of the message). * EOF is returned if end of file. * ---------------- */ static int ReadCommand(StringInfo inBuf) { int result; if (IsUnderPostmaster) result = SocketBackend(inBuf); else result = InteractiveBackend(inBuf); return result; } /* * Parse a query string and pass it through the rewriter. * * A list of Query nodes is returned, since the string might contain * multiple queries and/or the rewriter might expand one query to several. * * NOTE: this routine is no longer used for processing interactive queries, * but it is still needed for parsing of SQL function bodies. */ List * pg_parse_and_rewrite(const char *query_string, /* string to execute */ Oid *paramTypes, /* parameter types */ int numParams) /* number of parameters */ { List *raw_parsetree_list; List *querytree_list; List *list_item; /* * (1) parse the request string into a list of raw parse trees. */ raw_parsetree_list = pg_parse_query(query_string); /* * (2) Do parse analysis and rule rewrite. */ querytree_list = NIL; foreach(list_item, raw_parsetree_list) { Node *parsetree = (Node *) lfirst(list_item); querytree_list = nconc(querytree_list, pg_analyze_and_rewrite(parsetree, paramTypes, numParams)); } return querytree_list; } /* * Do raw parsing (only). * * A list of parsetrees is returned, since there might be multiple * commands in the given string. * * NOTE: for interactive queries, it is important to keep this routine * separate from the analysis & rewrite stages. Analysis and rewriting * cannot be done in an aborted transaction, since they require access to * database tables. So, we rely on the raw parser to determine whether * we've seen a COMMIT or ABORT command; when we are in abort state, other * commands are not processed any further than the raw parse stage. */ List * pg_parse_query(const char *query_string) { List *raw_parsetree_list; if (log_statement) elog(LOG, "query: %s", query_string); if (log_parser_stats) ResetUsage(); raw_parsetree_list = raw_parser(query_string); if (log_parser_stats) ShowUsage("PARSER STATISTICS"); return raw_parsetree_list; } /* * Given a raw parsetree (gram.y output), and optionally information about * types of parameter symbols ($n), perform parse analysis and rule rewriting. * * A list of Query nodes is returned, since either the analyzer or the * rewriter might expand one query to several. * * NOTE: for reasons mentioned above, this must be separate from raw parsing. */ List * pg_analyze_and_rewrite(Node *parsetree, Oid *paramTypes, int numParams) { List *querytree_list; List *list_item; Query *querytree; List *new_list; /* * (1) Perform parse analysis. */ if (log_parser_stats) ResetUsage(); querytree_list = parse_analyze(parsetree, paramTypes, numParams); if (log_parser_stats) { ShowUsage("PARSE ANALYSIS STATISTICS"); ResetUsage(); } /* * (2) Rewrite the queries, as necessary * * rewritten queries are collected in new_list. Note there may be more * or fewer than in the original list. */ new_list = NIL; foreach(list_item, querytree_list) { querytree = (Query *) lfirst(list_item); if (Debug_print_parse) elog_node_display(LOG, "parse tree", querytree, Debug_pretty_print); if (querytree->commandType == CMD_UTILITY) { /* don't rewrite utilities, just dump 'em into new_list */ new_list = lappend(new_list, querytree); } else { /* rewrite regular queries */ List *rewritten = QueryRewrite(querytree); new_list = nconc(new_list, rewritten); } } querytree_list = new_list; if (log_parser_stats) ShowUsage("REWRITER STATISTICS"); #ifdef COPY_PARSE_PLAN_TREES /* * Optional debugging check: pass querytree output through * copyObject() */ new_list = (List *) copyObject(querytree_list); /* This checks both copyObject() and the equal() routines... */ if (!equal(new_list, querytree_list)) elog(WARNING, "pg_analyze_and_rewrite: copyObject failed on parse tree"); else querytree_list = new_list; #endif if (Debug_print_rewritten) elog_node_display(LOG, "rewritten parse tree", querytree_list, Debug_pretty_print); return querytree_list; } /* Generate a plan for a single already-rewritten query. */ Plan * pg_plan_query(Query *querytree) { Plan *plan; /* Utility commands have no plans. */ if (querytree->commandType == CMD_UTILITY) return NULL; if (log_planner_stats) ResetUsage(); /* call the optimizer */ plan = planner(querytree, false, 0); if (log_planner_stats) ShowUsage("PLANNER STATISTICS"); #ifdef COPY_PARSE_PLAN_TREES /* Optional debugging check: pass plan output through copyObject() */ { Plan *new_plan = (Plan *) copyObject(plan); /* * equal() currently does not have routines to compare Plan nodes, * so don't try to test equality here. Perhaps fix someday? */ #ifdef NOT_USED /* This checks both copyObject() and the equal() routines... */ if (!equal(new_plan, plan)) elog(WARNING, "pg_plan_query: copyObject failed on plan tree"); else #endif plan = new_plan; } #endif /* * Print plan if debugging. */ if (Debug_print_plan) elog_node_display(LOG, "plan", plan, Debug_pretty_print); return plan; } /* * Generate plans for a list of already-rewritten queries. * * If needSnapshot is TRUE, we haven't yet set a snapshot for the current * query. A snapshot must be set before invoking the planner, since it * might try to evaluate user-defined functions. But we must not set a * snapshot if the list contains only utility statements, because some * utility statements depend on not having frozen the snapshot yet. * (We assume that such statements cannot appear together with plannable * statements in the rewriter's output.) */ List * pg_plan_queries(List *querytrees, bool needSnapshot) { List *plan_list = NIL; List *query_list; foreach(query_list, querytrees) { Query *query = (Query *) lfirst(query_list); Plan *plan; if (query->commandType == CMD_UTILITY) { /* Utility commands have no plans. */ plan = NULL; } else { if (needSnapshot) { SetQuerySnapshot(); needSnapshot = false; } plan = pg_plan_query(query); } plan_list = lappend(plan_list, plan); } return plan_list; } /* * exec_simple_query() * * Execute a "simple Query" protocol message. */ static void exec_simple_query(const char *query_string, /* string to execute */ CommandDest dest) /* where results should go */ { bool xact_started; MemoryContext oldcontext; List *parsetree_list, *parsetree_item; struct timeval start_t, stop_t; bool save_log_duration = log_duration; bool save_log_statement_stats = log_statement_stats; /* * Report query to various monitoring facilities. */ debug_query_string = query_string; pgstat_report_activity(query_string); /* * We use save_log_duration so "SET log_duration = true" doesn't * report incorrect time because gettimeofday() wasn't called. * Similarly, log_statement_stats has to be captured once. */ if (save_log_duration) gettimeofday(&start_t, NULL); if (save_log_statement_stats) ResetUsage(); /* * Start up a transaction command. All queries generated by the * query_string will be in this same command block, *unless* we find a * BEGIN/COMMIT/ABORT statement; we have to force a new xact command * after one of those, else bad things will happen in xact.c. (Note * that this will normally change current memory context.) */ start_xact_command(); xact_started = true; /* * Switch to appropriate context for constructing parsetrees. */ oldcontext = MemoryContextSwitchTo(MessageContext); /* * Do basic parsing of the query or queries (this should be safe even * if we are in aborted transaction state!) */ parsetree_list = pg_parse_query(query_string); /* * Switch back to transaction context to enter the loop. */ MemoryContextSwitchTo(oldcontext); /* * Run through the raw parsetree(s) and process each one. */ foreach(parsetree_item, parsetree_list) { Node *parsetree = (Node *) lfirst(parsetree_item); const char *commandTag; char completionTag[COMPLETION_TAG_BUFSIZE]; List *querytree_list, *plantree_list; Portal portal; /* * Get the command name for use in status display (it also becomes the * default completion tag, down inside PortalRun). Set ps_status and * do any special start-of-SQL-command processing needed by the * destination. */ commandTag = CreateCommandTag(parsetree); set_ps_display(commandTag); BeginCommand(commandTag, dest); /* * If we are in an aborted transaction, reject all commands except * COMMIT/ABORT. It is important that this test occur before we * try to do parse analysis, rewrite, or planning, since all those * phases try to do database accesses, which may fail in abort * state. (It might be safe to allow some additional utility * commands in this state, but not many...) */ if (IsAbortedTransactionBlockState()) { bool allowit = false; if (IsA(parsetree, TransactionStmt)) { TransactionStmt *stmt = (TransactionStmt *) parsetree; if (stmt->kind == TRANS_STMT_COMMIT || stmt->kind == TRANS_STMT_ROLLBACK) allowit = true; } if (!allowit) elog(ERROR, "current transaction is aborted, " "queries ignored until end of transaction block"); } /* Make sure we are in a transaction command */ if (!xact_started) { start_xact_command(); xact_started = true; } /* If we got a cancel signal in parsing or prior command, quit */ CHECK_FOR_INTERRUPTS(); /* * OK to analyze, rewrite, and plan this query. * * Switch to appropriate context for constructing querytrees (again, * these must outlive the execution context). */ oldcontext = MemoryContextSwitchTo(MessageContext); querytree_list = pg_analyze_and_rewrite(parsetree, NULL, 0); plantree_list = pg_plan_queries(querytree_list, true); /* If we got a cancel signal in analysis or planning, quit */ CHECK_FOR_INTERRUPTS(); /* * Switch back to transaction context for execution. */ MemoryContextSwitchTo(oldcontext); /* * Create unnamed portal to run the query or queries in. * If there already is one, silently drop it. */ portal = CreatePortal("", true, true); PortalDefineQuery(portal, query_string, commandTag, querytree_list, plantree_list, MessageContext); /* * Run the portal to completion, and then drop it. */ PortalStart(portal, NULL); (void) PortalRun(portal, FETCH_ALL, dest, dest, completionTag); PortalDrop(portal, false); /* * If this was a transaction control statement or a variable * set/show/reset statement, commit it and arrange to start a * new xact command for the next command (if any). */ if (IsA(parsetree, TransactionStmt) || IsA(parsetree, VariableSetStmt) || IsA(parsetree, VariableShowStmt) || IsA(parsetree, VariableResetStmt)) { finish_xact_command(true); xact_started = false; } /* * If this is the last parsetree of the query string, close down * transaction statement before reporting command-complete. This * is so that any end-of-transaction errors are reported before * the command-complete message is issued, to avoid confusing * clients who will expect either a command-complete message or an * error, not one and then the other. But for compatibility with * historical Postgres behavior, we do not force a transaction * boundary between queries appearing in a single query string. */ else if (lnext(parsetree_item) == NIL || !autocommit) { finish_xact_command(false); xact_started = false; } else { /* * We need a CommandCounterIncrement after every query, * except those that start or end a transaction block. */ CommandCounterIncrement(); } /* * Tell client that we're done with this query. Note we emit * exactly one EndCommand report for each raw parsetree, thus one * for each SQL command the client sent, regardless of rewriting. * (But a command aborted by error will not send an EndCommand * report at all.) */ EndCommand(completionTag, dest); } /* end loop over parsetrees */ /* * If there were no parsetrees, return EmptyQueryResponse message. */ if (!parsetree_list) NullCommand(dest); /* * Close down transaction statement, if one is open. */ if (xact_started) finish_xact_command(false); /* * Finish up monitoring. */ if (save_log_duration) { gettimeofday(&stop_t, NULL); if (stop_t.tv_usec < start_t.tv_usec) { stop_t.tv_sec--; stop_t.tv_usec += 1000000; } elog(LOG, "duration: %ld.%06ld sec", (long) (stop_t.tv_sec - start_t.tv_sec), (long) (stop_t.tv_usec - start_t.tv_usec)); } if (save_log_statement_stats) ShowUsage("QUERY STATISTICS"); debug_query_string = NULL; } /* * Convenience routines for starting/committing a single command. */ static void start_xact_command(void) { elog(DEBUG1, "StartTransactionCommand"); StartTransactionCommand(false); /* Set statement timeout running, if any */ if (StatementTimeout > 0) enable_sig_alarm(StatementTimeout, true); } static void finish_xact_command(bool forceCommit) { /* Invoke IMMEDIATE constraint triggers */ DeferredTriggerEndQuery(); /* Cancel any active statement timeout before committing */ disable_sig_alarm(true); /* Now commit the command */ elog(DEBUG1, "CommitTransactionCommand"); CommitTransactionCommand(forceCommit); #ifdef SHOW_MEMORY_STATS /* Print mem stats at each commit for leak tracking */ if (ShowStats) MemoryContextStats(TopMemoryContext); #endif } /* -------------------------------- * signal handler routines used in PostgresMain() * -------------------------------- */ /* * quickdie() occurs when signalled SIGQUIT by the postmaster. * * Some backend has bought the farm, * so we need to stop what we're doing and exit. */ void quickdie(SIGNAL_ARGS) { PG_SETMASK(&BlockSig); elog(WARNING, "Message from PostgreSQL backend:" "\n\tThe Postmaster has informed me that some other backend" "\n\tdied abnormally and possibly corrupted shared memory." "\n\tI have rolled back the current transaction and am" "\n\tgoing to terminate your database system connection and exit." "\n\tPlease reconnect to the database system and repeat your query."); /* * DO NOT proc_exit() -- we're here because shared memory may be * corrupted, so we don't want to try to clean up our transaction. * Just nail the windows shut and get out of town. * * Note we do exit(1) not exit(0). This is to force the postmaster into * a system reset cycle if some idiot DBA sends a manual SIGQUIT to a * random backend. This is necessary precisely because we don't clean * up our shared memory state. */ exit(1); } /* * Shutdown signal from postmaster: abort transaction and exit * at soonest convenient time */ void die(SIGNAL_ARGS) { int save_errno = errno; /* Don't joggle the elbow of proc_exit */ if (!proc_exit_inprogress) { InterruptPending = true; ProcDiePending = true; /* * If it's safe to interrupt, and we're waiting for input or a * lock, service the interrupt immediately */ if (ImmediateInterruptOK && InterruptHoldoffCount == 0 && CritSectionCount == 0) { /* bump holdoff count to make ProcessInterrupts() a no-op */ /* until we are done getting ready for it */ InterruptHoldoffCount++; DisableNotifyInterrupt(); /* Make sure CheckDeadLock won't run while shutting down... */ LockWaitCancel(); InterruptHoldoffCount--; ProcessInterrupts(); } } errno = save_errno; } /* * Timeout or shutdown signal from postmaster during client authentication. * Simply exit(0). * * XXX: possible future improvement: try to send a message indicating * why we are disconnecting. Problem is to be sure we don't block while * doing so, nor mess up the authentication message exchange. */ void authdie(SIGNAL_ARGS) { exit(0); } /* * Query-cancel signal from postmaster: abort current transaction * at soonest convenient time */ static void StatementCancelHandler(SIGNAL_ARGS) { int save_errno = errno; /* * Don't joggle the elbow of proc_exit, nor an already-in-progress * abort */ if (!proc_exit_inprogress && !InError) { InterruptPending = true; QueryCancelPending = true; /* * If it's safe to interrupt, and we're waiting for a lock, * service the interrupt immediately. No point in interrupting if * we're waiting for input, however. */ if (ImmediateInterruptOK && InterruptHoldoffCount == 0 && CritSectionCount == 0) { /* bump holdoff count to make ProcessInterrupts() a no-op */ /* until we are done getting ready for it */ InterruptHoldoffCount++; if (LockWaitCancel()) { DisableNotifyInterrupt(); InterruptHoldoffCount--; ProcessInterrupts(); } else InterruptHoldoffCount--; } } errno = save_errno; } /* signal handler for floating point exception */ static void FloatExceptionHandler(SIGNAL_ARGS) { elog(ERROR, "floating point exception!" " The last floating point operation either exceeded legal ranges" " or was a divide by zero"); } /* SIGHUP: set flag to re-read config file at next convenient time */ static void SigHupHandler(SIGNAL_ARGS) { got_SIGHUP = true; } /* * ProcessInterrupts: out-of-line portion of CHECK_FOR_INTERRUPTS() macro * * If an interrupt condition is pending, and it's safe to service it, * then clear the flag and accept the interrupt. Called only when * InterruptPending is true. */ void ProcessInterrupts(void) { /* OK to accept interrupt now? */ if (InterruptHoldoffCount != 0 || CritSectionCount != 0) return; InterruptPending = false; if (ProcDiePending) { ProcDiePending = false; QueryCancelPending = false; /* ProcDie trumps QueryCancel */ ImmediateInterruptOK = false; /* not idle anymore */ DisableNotifyInterrupt(); elog(FATAL, "This connection has been terminated by the administrator."); } if (QueryCancelPending) { QueryCancelPending = false; ImmediateInterruptOK = false; /* not idle anymore */ DisableNotifyInterrupt(); elog(ERROR, "Query was canceled."); } /* If we get here, do nothing (probably, QueryCancelPending was reset) */ } static void usage(char *progname) { printf("%s is the PostgreSQL stand-alone backend. It is not\nintended to be used by normal users.\n\n", progname); printf("Usage:\n %s [OPTION]... [DBNAME]\n\n", progname); printf("Options:\n"); #ifdef USE_ASSERT_CHECKING printf(" -A 1|0 enable/disable run-time assert checking\n"); #endif printf(" -B NBUFFERS number of shared buffers (default %d)\n", DEF_NBUFFERS); printf(" -c NAME=VALUE set run-time parameter\n"); printf(" -d 0-5 debugging level (0 is off)\n"); printf(" -D DATADIR database directory\n"); printf(" -e use European date format\n"); printf(" -E echo query before execution\n"); printf(" -F turn fsync off\n"); printf(" -N do not use newline as interactive query delimiter\n"); printf(" -o FILENAME send stdout and stderr to given file\n"); printf(" -P disable system indexes\n"); printf(" -s show statistics after each query\n"); printf(" -S SORT-MEM set amount of memory for sorts (in kbytes)\n"); printf(" --help show this help, then exit\n"); printf(" --version output version information, then exit\n"); printf("\nDeveloper options:\n"); printf(" -f s|i|n|m|h forbid use of some plan types\n"); printf(" -i do not execute queries\n"); printf(" -O allow system table structure changes\n"); printf(" -t pa|pl|ex show timings after each query\n"); printf(" -W NUM wait NUM seconds to allow attach from a debugger\n"); printf("\nReport bugs to .\n"); } /* ---------------------------------------------------------------- * PostgresMain * postgres main loop -- all backends, interactive or otherwise start here * * argc/argv are the command line arguments to be used. (When being forked * by the postmaster, these are not the original argv array of the process.) * username is the (possibly authenticated) PostgreSQL user name to be used * for the session. * ---------------------------------------------------------------- */ int PostgresMain(int argc, char *argv[], const char *username) { int flag; const char *DBName = NULL; char *potential_DataDir = NULL; bool secure; int errs = 0; int debug_flag = 0; GucContext ctx; GucSource gucsource; char *tmp; int firstchar; StringInfo input_message; bool send_rfq; /* * Catch standard options before doing much else. This even works on * systems without getopt_long. */ if (!IsUnderPostmaster && argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { usage(argv[0]); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("postgres (PostgreSQL) " PG_VERSION); exit(0); } } /* * initialize globals (already done if under postmaster, but not if * standalone; cheap enough to do over) */ MyProcPid = getpid(); /* * Fire up essential subsystems: error and memory management * * If we are running under the postmaster, this is done already. */ if (!IsUnderPostmaster) MemoryContextInit(); set_ps_display("startup"); SetProcessingMode(InitProcessing); /* * Set default values for command-line options. */ Noversion = false; EchoQuery = false; if (!IsUnderPostmaster) { InitializeGUCOptions(); potential_DataDir = getenv("PGDATA"); } /* ---------------- * parse command line arguments * * There are now two styles of command line layout for the backend: * * For interactive use (not started from postmaster) the format is * postgres [switches] [databasename] * If the databasename is omitted it is taken to be the user name. * * When started from the postmaster, the format is * postgres [secure switches] -p databasename [insecure switches] * Switches appearing after -p came from the client (via "options" * field of connection request). For security reasons we restrict * what these switches can do. * ---------------- */ /* all options are allowed until '-p' */ secure = true; ctx = PGC_POSTMASTER; gucsource = PGC_S_ARGV; /* initial switches came from command line */ while ((flag = getopt(argc, argv, "A:B:c:CD:d:Eef:FiNOPo:p:S:st:v:W:x:-:")) != -1) switch (flag) { case 'A': #ifdef USE_ASSERT_CHECKING SetConfigOption("debug_assertions", optarg, ctx, gucsource); #else elog(WARNING, "Assert checking is not compiled in"); #endif break; case 'B': /* * specify the size of buffer pool */ SetConfigOption("shared_buffers", optarg, ctx, gucsource); break; case 'C': /* * don't print version string */ Noversion = true; break; case 'D': /* PGDATA directory */ if (secure) potential_DataDir = optarg; break; case 'd': /* debug level */ { debug_flag = atoi(optarg); /* Set server debugging level. */ if (atoi(optarg) != 0) { char *debugstr = palloc(strlen("debug") + strlen(optarg) + 1); sprintf(debugstr, "debug%s", optarg); SetConfigOption("log_min_messages", debugstr, ctx, gucsource); pfree(debugstr); } else /* * -d0 allows user to prevent postmaster debug * from propagating to backend. It would be nice * to set it to the postgresql.conf value here. */ SetConfigOption("log_min_messages", "notice", ctx, gucsource); } break; case 'E': /* * E - echo the query the user entered */ EchoQuery = true; break; case 'e': /* * Use european date formats. */ SetConfigOption("datestyle", "euro", ctx, gucsource); break; case 'F': /* * turn off fsync */ SetConfigOption("fsync", "false", ctx, gucsource); break; case 'f': /* * f - forbid generation of certain plans */ tmp = NULL; switch (optarg[0]) { case 's': /* seqscan */ tmp = "enable_seqscan"; break; case 'i': /* indexscan */ tmp = "enable_indexscan"; break; case 't': /* tidscan */ tmp = "enable_tidscan"; break; case 'n': /* nestloop */ tmp = "enable_nestloop"; break; case 'm': /* mergejoin */ tmp = "enable_mergejoin"; break; case 'h': /* hashjoin */ tmp = "enable_hashjoin"; break; default: errs++; } if (tmp) SetConfigOption(tmp, "false", ctx, gucsource); break; case 'N': /* * N - Don't use newline as a query delimiter */ UseNewLine = 0; break; case 'O': /* * allow system table structure modifications */ if (secure) /* XXX safe to allow from client??? */ allowSystemTableMods = true; break; case 'P': /* * ignore system indexes */ if (secure) /* XXX safe to allow from client??? */ IgnoreSystemIndexes(true); break; case 'o': /* * o - send output (stdout and stderr) to the given file */ if (secure) StrNCpy(OutputFileName, optarg, MAXPGPATH); break; case 'p': /* * p - special flag passed if backend was forked by a * postmaster. */ if (secure) { DBName = strdup(optarg); secure = false; /* subsequent switches are NOT * secure */ ctx = PGC_BACKEND; gucsource = PGC_S_CLIENT; } break; case 'S': /* * S - amount of sort memory to use in 1k bytes */ SetConfigOption("sort_mem", optarg, ctx, gucsource); break; case 's': /* * s - report usage statistics (timings) after each query */ SetConfigOption("show_statement_stats", "true", ctx, gucsource); break; case 't': /* --------------- * tell postgres to report usage statistics (timings) for * each query * * -tpa[rser] = print stats for parser time of each query * -tpl[anner] = print stats for planner time of each query * -te[xecutor] = print stats for executor time of each query * caution: -s can not be used together with -t. * ---------------- */ tmp = NULL; switch (optarg[0]) { case 'p': if (optarg[1] == 'a') tmp = "log_parser_stats"; else if (optarg[1] == 'l') tmp = "log_planner_stats"; else errs++; break; case 'e': tmp = "show_executor_stats"; break; default: errs++; break; } if (tmp) SetConfigOption(tmp, "true", ctx, gucsource); break; case 'v': if (secure) FrontendProtocol = (ProtocolVersion) atoi(optarg); break; case 'W': /* * wait N seconds to allow attach from a debugger */ sleep(atoi(optarg)); break; case 'x': #ifdef NOT_USED /* planner/xfunc.h */ /* * control joey hellerstein's expensive function * optimization */ if (XfuncMode != 0) { elog(WARNING, "only one -x flag is allowed"); errs++; break; } if (strcmp(optarg, "off") == 0) XfuncMode = XFUNC_OFF; else if (strcmp(optarg, "nor") == 0) XfuncMode = XFUNC_NOR; else if (strcmp(optarg, "nopull") == 0) XfuncMode = XFUNC_NOPULL; else if (strcmp(optarg, "nopm") == 0) XfuncMode = XFUNC_NOPM; else if (strcmp(optarg, "pullall") == 0) XfuncMode = XFUNC_PULLALL; else if (strcmp(optarg, "wait") == 0) XfuncMode = XFUNC_WAIT; else { elog(WARNING, "use -x {off,nor,nopull,nopm,pullall,wait}"); errs++; } #endif break; case 'c': case '-': { char *name, *value; ParseLongOption(optarg, &name, &value); if (!value) { if (flag == '-') elog(ERROR, "--%s requires argument", optarg); else elog(ERROR, "-c %s requires argument", optarg); } SetConfigOption(name, value, ctx, gucsource); free(name); if (value) free(value); break; } default: errs++; break; } /* * -d is not the same as setting * log_min_messages because it enables other * output options. */ if (debug_flag >= 1) SetConfigOption("log_connections", "true", ctx, gucsource); if (debug_flag >= 2) SetConfigOption("log_statement", "true", ctx, gucsource); if (debug_flag >= 3) SetConfigOption("debug_print_parse", "true", ctx, gucsource); if (debug_flag >= 4) SetConfigOption("debug_print_plan", "true", ctx, gucsource); if (debug_flag >= 5) SetConfigOption("debug_print_rewritten", "true", ctx, gucsource); /* * Process any additional GUC variable settings passed in startup packet. */ if (MyProcPort != NULL) { List *gucopts = MyProcPort->guc_options; while (gucopts) { char *name, *value; name = lfirst(gucopts); gucopts = lnext(gucopts); value = lfirst(gucopts); gucopts = lnext(gucopts); SetConfigOption(name, value, PGC_BACKEND, PGC_S_CLIENT); } } /* * Post-processing for command line options. */ if (log_statement_stats && (log_parser_stats || log_planner_stats || log_executor_stats)) { elog(WARNING, "Query statistics are disabled because parser, planner, or executor statistics are on."); SetConfigOption("show_statement_stats", "false", ctx, gucsource); } if (!IsUnderPostmaster) { if (!potential_DataDir) { fprintf(stderr, "%s does not know where to find the database system " "data. You must specify the directory that contains the " "database system either by specifying the -D invocation " "option or by setting the PGDATA environment variable.\n\n", argv[0]); proc_exit(1); } SetDataDir(potential_DataDir); } Assert(DataDir); /* * Set up signal handlers and masks. * * Note that postmaster blocked all signals before forking child process, * so there is no race condition whereby we might receive a signal * before we have set up the handler. * * Also note: it's best not to use any signals that are SIG_IGNored in * the postmaster. If such a signal arrives before we are able to * change the handler to non-SIG_IGN, it'll get dropped. Instead, * make a dummy handler in the postmaster to reserve the signal. (Of * course, this isn't an issue for signals that are locally generated, * such as SIGALRM and SIGPIPE.) */ pqsignal(SIGHUP, SigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, StatementCancelHandler); /* cancel current query */ pqsignal(SIGTERM, die); /* cancel current query and exit */ pqsignal(SIGQUIT, quickdie); /* hard crash time */ pqsignal(SIGALRM, handle_sig_alarm); /* timeout conditions */ /* * Ignore failure to write to frontend. Note: if frontend closes * connection, we will notice it and exit cleanly when control next * returns to outer loop. This seems safer than forcing exit in the * midst of output during who-knows-what operation... */ pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* this signal available for use */ pqsignal(SIGUSR2, Async_NotifyHandler); /* flush also sinval cache */ pqsignal(SIGFPE, FloatExceptionHandler); /* * Reset some signals that are accepted by postmaster but not by * backend */ pqsignal(SIGCHLD, SIG_DFL); /* system() requires this on some * platforms */ pqinitmask(); /* We allow SIGQUIT (quickdie) at all times */ #ifdef HAVE_SIGPROCMASK sigdelset(&BlockSig, SIGQUIT); #else BlockSig &= ~(sigmask(SIGQUIT)); #endif PG_SETMASK(&BlockSig); /* block everything except SIGQUIT */ if (IsUnderPostmaster) { /* noninteractive case: nothing should be left after switches */ if (errs || argc != optind || DBName == NULL) { elog(WARNING, "%s: invalid command line arguments\nTry -? for help.", argv[0]); proc_exit(0); /* not 1, that causes system-wide * restart... */ } BaseInit(); } else { /* interactive case: database name can be last arg on command line */ if (errs || argc - optind > 1) { elog(WARNING, "%s: invalid command line arguments\nTry -? for help.", argv[0]); proc_exit(1); } else if (argc - optind == 1) DBName = argv[optind]; else if ((DBName = username) == NULL) { elog(WARNING, "%s: user name undefined and no database specified", argv[0]); proc_exit(1); } /* * On some systems our dynloader code needs the executable's * pathname. (If under postmaster, this was done already.) */ if (FindExec(pg_pathname, argv[0], "postgres") < 0) elog(FATAL, "%s: could not locate executable, bailing out...", argv[0]); /* * Validate we have been given a reasonable-looking DataDir (if * under postmaster, assume postmaster did this already). */ ValidatePgVersion(DataDir); /* * Create lockfile for data directory. */ if (!CreateDataDirLockFile(DataDir, false)) proc_exit(1); XLOGPathInit(); BaseInit(); /* * Start up xlog for standalone backend, and register to have it * closed down at exit. */ StartupXLOG(); on_shmem_exit(ShutdownXLOG, 0); /* * Read any existing FSM cache file, and register to write one out * at exit. */ LoadFreeSpaceMap(); on_shmem_exit(DumpFreeSpaceMap, 0); } /* * Set up additional info. */ #ifdef CYR_RECODE SetCharSet(); #endif /* * General initialization. * * NOTE: if you are tempted to add code in this vicinity, consider * putting it inside InitPostgres() instead. In particular, anything * that involves database access should be there, not here. */ elog(DEBUG2, "InitPostgres"); InitPostgres(DBName, username); SetProcessingMode(NormalProcessing); /* * Send this backend's cancellation info to the frontend. */ if (whereToSendOutput == Remote && PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2) { StringInfoData buf; pq_beginmessage(&buf, 'K'); pq_sendint(&buf, (int32) MyProcPid, sizeof(int32)); pq_sendint(&buf, (int32) MyCancelKey, sizeof(int32)); pq_endmessage(&buf); /* Need not flush since ReadyForQuery will do it. */ } if (!IsUnderPostmaster) { puts("\nPOSTGRES backend interactive interface "); puts("$Revision: 1.329 $ $Date: 2003/05/02 21:59:31 $\n"); } /* * Create the memory context we will use in the main loop. * * MessageContext is reset once per iteration of the main loop, ie, upon * completion of processing of each command message from the client. */ MessageContext = AllocSetContextCreate(TopMemoryContext, "MessageContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* ---------- * Tell the statistics collector that we're alive and * to which database we belong. * ---------- */ pgstat_bestart(); /* * POSTGRES main processing loop begins here * * If an exception is encountered, processing resumes here so we abort * the current transaction and start a new one. */ if (sigsetjmp(Warn_restart, 1) != 0) { /* * NOTE: if you are tempted to add more code in this if-block, * consider the probability that it should be in * AbortTransaction() instead. * * Make sure we're not interrupted while cleaning up. Also forget * any pending QueryCancel request, since we're aborting anyway. * Force InterruptHoldoffCount to a known state in case we elog'd * from inside a holdoff section. */ ImmediateInterruptOK = false; QueryCancelPending = false; InterruptHoldoffCount = 1; CritSectionCount = 0; /* should be unnecessary, but... */ disable_sig_alarm(true); QueryCancelPending = false; /* again in case timeout occurred */ DisableNotifyInterrupt(); debug_query_string = NULL; /* * Make sure we are in a valid memory context during recovery. * * We use ErrorContext in hopes that it will have some free space * even if we're otherwise up against it... */ MemoryContextSwitchTo(ErrorContext); /* Do the recovery */ elog(DEBUG1, "AbortCurrentTransaction"); AbortCurrentTransaction(); /* * Now return to normal top-level context and clear ErrorContext * for next time. */ MemoryContextSwitchTo(TopMemoryContext); MemoryContextResetAndDeleteChildren(ErrorContext); CurrentPortal = NULL; PortalContext = NULL; QueryContext = NULL; /* * Clear flag to indicate that we got out of error recovery mode * successfully. (Flag was set in elog.c before longjmp().) */ InError = false; /* * Exit interrupt holdoff section we implicitly established above. */ RESUME_INTERRUPTS(); } Warn_restart_ready = true; /* we can now handle elog(ERROR) */ PG_SETMASK(&UnBlockSig); send_rfq = true; /* initially, or after error */ /* * Non-error queries loop here. */ for (;;) { /* * Release storage left over from prior query cycle, and create a * new query input buffer in the cleared MessageContext. */ MemoryContextSwitchTo(MessageContext); MemoryContextResetAndDeleteChildren(MessageContext); input_message = makeStringInfo(); /* * (1) tell the frontend we're ready for a new query. * * Note: this includes fflush()'ing the last of the prior output. */ if (send_rfq) { ReadyForQuery(whereToSendOutput); send_rfq = false; } /* ---------- * Tell the statistics collector what we've collected * so far. * ---------- */ pgstat_report_tabstat(); if (IsTransactionBlock()) { set_ps_display("idle in transaction"); pgstat_report_activity(" in transaction"); } else { set_ps_display("idle"); pgstat_report_activity(""); } /* * (2) deal with pending asynchronous NOTIFY from other backends, * and enable async.c's signal handler to execute NOTIFY directly. * Then set up other stuff needed before blocking for input. */ QueryCancelPending = false; /* forget any earlier CANCEL * signal */ EnableNotifyInterrupt(); /* Allow "die" interrupt to be processed while waiting */ ImmediateInterruptOK = true; /* and don't forget to detect one that already arrived */ QueryCancelPending = false; CHECK_FOR_INTERRUPTS(); /* * (3) read a command (loop blocks here) */ firstchar = ReadCommand(input_message); /* * (4) disable async signal conditions again. */ ImmediateInterruptOK = false; QueryCancelPending = false; /* forget any CANCEL signal */ DisableNotifyInterrupt(); /* * (5) check for any other interesting events that happened while * we slept. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } /* * (6) process the command. */ switch (firstchar) { case 'Q': /* simple query */ { const char *query_string = pq_getmsgstring(input_message); exec_simple_query(query_string, whereToSendOutput); send_rfq = true; } break; case 'F': /* fastpath function call */ /* Tell the collector what we're doing */ pgstat_report_activity(" function call"); /* start an xact for this function invocation */ start_xact_command(); if (HandleFunctionRequest(input_message) == EOF) { /* lost frontend connection during F message input */ /* * Reset whereToSendOutput to prevent elog from * attempting to send any more messages to client. */ if (whereToSendOutput == Remote) whereToSendOutput = None; proc_exit(0); } /* commit the function-invocation transaction */ finish_xact_command(false); send_rfq = true; break; /* * 'X' means that the frontend is closing down the socket. * EOF means unexpected loss of frontend connection. * Either way, perform normal shutdown. */ case 'X': case EOF: /* * Reset whereToSendOutput to prevent elog from attempting * to send any more messages to client. */ if (whereToSendOutput == Remote) whereToSendOutput = None; /* * NOTE: if you are tempted to add more code here, DON'T! * Whatever you had in mind to do should be set up as an * on_proc_exit or on_shmem_exit callback, instead. * Otherwise it will fail to be called during other * backend-shutdown scenarios. */ proc_exit(0); case 'd': /* copy data */ case 'c': /* copy done */ case 'f': /* copy fail */ /* * Accept but ignore these messages, per protocol spec; * we probably got here because a COPY failed, and the * frontend is still sending data. */ break; default: elog(FATAL, "Socket command type %c unknown", firstchar); } #ifdef MEMORY_CONTEXT_CHECKING /* * Check all memory after each backend loop. This is a rather * weird place to do it, perhaps. */ MemoryContextCheck(TopMemoryContext); #endif } /* end of input-reading loop */ /* can't get here because the above loop never exits */ Assert(false); return 1; /* keep compiler quiet */ } #ifndef HAVE_GETRUSAGE #include "rusagestub.h" #else #include #endif /* HAVE_GETRUSAGE */ struct rusage Save_r; struct timeval Save_t; void ResetUsage(void) { getrusage(RUSAGE_SELF, &Save_r); gettimeofday(&Save_t, NULL); ResetBufferUsage(); /* ResetTupleCount(); */ } void ShowUsage(const char *title) { StringInfoData str; struct timeval user, sys; struct timeval elapse_t; struct rusage r; char *bufusage; getrusage(RUSAGE_SELF, &r); gettimeofday(&elapse_t, NULL); memcpy((char *) &user, (char *) &r.ru_utime, sizeof(user)); memcpy((char *) &sys, (char *) &r.ru_stime, sizeof(sys)); if (elapse_t.tv_usec < Save_t.tv_usec) { elapse_t.tv_sec--; elapse_t.tv_usec += 1000000; } if (r.ru_utime.tv_usec < Save_r.ru_utime.tv_usec) { r.ru_utime.tv_sec--; r.ru_utime.tv_usec += 1000000; } if (r.ru_stime.tv_usec < Save_r.ru_stime.tv_usec) { r.ru_stime.tv_sec--; r.ru_stime.tv_usec += 1000000; } /* * the only stats we don't show here are for memory usage -- i can't * figure out how to interpret the relevant fields in the rusage * struct, and they change names across o/s platforms, anyway. if you * can figure out what the entries mean, you can somehow extract * resident set size, shared text size, and unshared data and stack * sizes. */ initStringInfo(&str); appendStringInfo(&str, "! system usage stats:\n"); appendStringInfo(&str, "!\t%ld.%06ld elapsed %ld.%06ld user %ld.%06ld system sec\n", (long) (elapse_t.tv_sec - Save_t.tv_sec), (long) (elapse_t.tv_usec - Save_t.tv_usec), (long) (r.ru_utime.tv_sec - Save_r.ru_utime.tv_sec), (long) (r.ru_utime.tv_usec - Save_r.ru_utime.tv_usec), (long) (r.ru_stime.tv_sec - Save_r.ru_stime.tv_sec), (long) (r.ru_stime.tv_usec - Save_r.ru_stime.tv_usec)); appendStringInfo(&str, "!\t[%ld.%06ld user %ld.%06ld sys total]\n", (long) user.tv_sec, (long) user.tv_usec, (long) sys.tv_sec, (long) sys.tv_usec); /* BeOS has rusage but only has some fields, and not these... */ #if defined(HAVE_GETRUSAGE) appendStringInfo(&str, "!\t%ld/%ld [%ld/%ld] filesystem blocks in/out\n", r.ru_inblock - Save_r.ru_inblock, /* they only drink coffee at dec */ r.ru_oublock - Save_r.ru_oublock, r.ru_inblock, r.ru_oublock); appendStringInfo(&str, "!\t%ld/%ld [%ld/%ld] page faults/reclaims, %ld [%ld] swaps\n", r.ru_majflt - Save_r.ru_majflt, r.ru_minflt - Save_r.ru_minflt, r.ru_majflt, r.ru_minflt, r.ru_nswap - Save_r.ru_nswap, r.ru_nswap); appendStringInfo(&str, "!\t%ld [%ld] signals rcvd, %ld/%ld [%ld/%ld] messages rcvd/sent\n", r.ru_nsignals - Save_r.ru_nsignals, r.ru_nsignals, r.ru_msgrcv - Save_r.ru_msgrcv, r.ru_msgsnd - Save_r.ru_msgsnd, r.ru_msgrcv, r.ru_msgsnd); appendStringInfo(&str, "!\t%ld/%ld [%ld/%ld] voluntary/involuntary context switches\n", r.ru_nvcsw - Save_r.ru_nvcsw, r.ru_nivcsw - Save_r.ru_nivcsw, r.ru_nvcsw, r.ru_nivcsw); #endif /* HAVE_GETRUSAGE */ bufusage = ShowBufferUsage(); appendStringInfo(&str, "! buffer usage stats:\n%s", bufusage); pfree(bufusage); /* remove trailing newline */ if (str.data[str.len - 1] == '\n') str.data[--str.len] = '\0'; elog(LOG, "%s\n%s", title, str.data); pfree(str.data); }