From: Marko Kreen Date: Tue, 13 Mar 2007 15:31:43 +0000 (+0000) Subject: Initial revision X-Git-Tag: pgbouncer_1_0~1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a0d2b294e0270f8a246e5b98f0700716c0672b0d;p=pgbouncer Initial revision --- a0d2b294e0270f8a246e5b98f0700716c0672b0d diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..dfceaba --- /dev/null +++ b/AUTHORS @@ -0,0 +1,4 @@ + +Marko Kreen - main coder +Martin Pihlak - head inquisitor + diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..5d264f7 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,16 @@ +PgBouncer - Lightweight connection pooler for PostgreSQL. + +Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..95cfe57 --- /dev/null +++ b/Makefile @@ -0,0 +1,120 @@ + +# sources +SRCS = client.c loader.c objects.c pooler.c proto.c sbuf.c server.c util.c \ + admin.c stats.c takeover.c md5.c janitor.c pktbuf.c main.c +HDRS = client.h loader.h objects.h pooler.h proto.h sbuf.h server.h util.h \ + admin.h stats.h takeover.h md5.h janitor.h pktbuf.h bouncer.h \ + list.h mbuf.h system.h + +# data & dirs to include in tgz +DATA = README etc/pgbouncer.ini Makefile config.mak.in config.h.in \ + configure configure.ac debian/packages debian/changelog +DIRS = etc src debian + +# keep autoconf stuff separate +-include config.mak + +# calculate full-path values +OBJS = $(SRCS:.c=.o) +hdrs = $(addprefix $(srcdir)/src/, $(HDRS)) +srcs = $(addprefix $(srcdir)/src/, $(SRCS)) +objs = $(addprefix $(builddir)/lib/, $(OBJS)) +FULL = $(PACKAGE_TARNAME)-$(PACKAGE_VERSION) +DISTFILES = $(DIRS) $(DATA) $(srcs) $(hdrs) + +# Quiet by default, 'make V=1' shows commands +V=0 +ifeq ($(V), 0) +Q = @ +E = @echo +else +Q = +E = @true +endif + +## actual targets now ## + +# default target +all: pgbouncer + +# final executable +pgbouncer: config.mak $(objs) + $(E) " LD" $@ + $(Q) $(CC) -o $@ $(LDFLAGS) $(objs) $(LIBS) + +# objects depend on all the headers +$(builddir)/lib/%.o: $(srcdir)/src/%.c config.mak $(hdrs) + @mkdir -p $(builddir)/lib + $(E) " CC" $< + $(Q) $(CC) -c -o $@ $< $(DEFS) $(CFLAGS) $(CPPFLAGS) + +# install binary and other stuff +install: pgbouncer + mkdir -p $(DESTDIR)$(bindir) + mkdir -p $(DESTDIR)$(docdir) + $(BININSTALL) -m 755 pgbouncer $(DESTDIR)$(bindir) + $(INSTALL) -m 644 $(srcdir)/etc/pgbouncer.ini $(DESTDIR)$(docdir) + +# create tarfile +tgz: config.mak $(DISTFILES) + rm -rf $(FULL) $(FULL).tgz + mkdir $(FULL) + (for f in $(DISTFILES); do echo $$f; done) | cpio -p $(FULL) + tar czf $(FULL).tgz $(FULL) + rm -rf $(FULL) + +# create debian package +deb: configure + yada rebuild + debuild -uc -us -b + +# clean object files +clean: + rm -f *~ src/*~ *.o src/*.o lib/*.o pgbouncer core core.* + +# clean configure results +distclean: clean + rm -f config.h config.log config.status config.mak + rm -rf lib autom4te* + +# clean autoconf results +realclean: distclean + rm -f aclocal* config.h.in configure depcomp install-sh missing + rm -f tags + +# generate configure script and config.h.in +boot: distclean + autoreconf -i -f + rm -rf autom4te* config.h.in~ + +# targets can depend on this to force ./configure +config.mak:: + @test -f configure || { \ + echo "Please run 'make boot && ./configure' first.";exit 1;} + @test -f $@ || { echo "Please run ./configure first.";exit 1;} + +# targets can depend on this to force 'make boot' +configure:: + @test -f $@ || { echo "Please run 'make boot' first.";exit 1;} + +# create tags file +tags: $(srcs) $(hdrs) + if test -f ../libevent/event.h; then \ + ctags $(srcs) $(hdrs) ../libevent/*.[ch]; \ + else \ + ctags $(srcs) $(hdrs); \ + fi + +# fixes for macos +SPARSE_MACOS=-D__STDC_VERSION__=199901 -D__LP64__=0 -DSENDFILE=1 \ + -I/usr/lib/gcc/i486-linux-gnu/4.1.2/include +# sparse does not have any identity +SPARCE_FLAGS=-D__LITTLE_ENDIAN__ -D__i386__ -D__GNUC__=3 -D__GNUC_MINOR__=0 \ + -Wno-transparent-union \ + -Wall $(SPARSE_MACOS) $(CPPFLAGS) $(DEFS) + +# run sparse over code +check: config.mak + $(E) " CHECK" $(srcs) + $(Q) sparse $(SPARCE_FLAGS) $(srcs) + diff --git a/README b/README new file mode 100644 index 0000000..9fbdbd0 --- /dev/null +++ b/README @@ -0,0 +1,22 @@ + +PgBouncer +========= + +Lightweight connection pooler for PostgreSQL. + +Docs: http://developer.skype.com/SkypeGarage/DbProjects/PgBouncer +Source: http://pgfoundry.org/projects/pgbouncer + +Building +--------- + +PgBouncer uses libevent for low-level socket handling. When this is +installed just run: + + $ ./configure --prefix=/usr/local --with-libevent=/prefix + $ make + $ make install + +If the OS does not have libevent available as package, it can be +downloaded from http://monkey.org/~provos/libevent/ + diff --git a/config.mak.in b/config.mak.in new file mode 100644 index 0000000..7eb4677 --- /dev/null +++ b/config.mak.in @@ -0,0 +1,33 @@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ + +DEFS = @DEFS@ +LIBS = @LIBS@ +CC = @CC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CFLAGS = @CFLAGS@ +LDFLAGS = @LDFLAGS@ + +prefix = @prefix@ +exec_prefix = @exec_prefix@ +bindir = @bindir@ +datarootdir = @datarootdir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +docdir = @docdir@ + +top_builddir = @top_builddir@ +srcdir = @srcdir@ +abs_srcdir = @abs_srcdir@ +top_srcdir = @top_srcdir@ +abs_top_srcdir = @abs_top_srcdir@ +builddir = @builddir@ +abs_builddir = @abs_builddir@ +abs_top_builddir = @abs_top_builddir@ + +# autoconf does not want to find 'install' +# if im not using automake... +INSTALL = @INSTALL@ +BININSTALL = @BININSTALL@ + diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..9efbb56 --- /dev/null +++ b/configure.ac @@ -0,0 +1,115 @@ +dnl Process this file with autoconf to produce a configure script. + +AC_INIT(pgbouncer, 1.0) +AC_CONFIG_SRCDIR(src/bouncer.h) +AC_CONFIG_HEADER(config.h) + +dnl Checks for programs. +AC_PROG_CC +AC_PROG_CPP + +dnl Additional gcc tuning +if test x"$GCC" = xyes; then + AC_MSG_CHECKING([for working warning swithces]) + good_CFLAGS="$CFLAGS" + good="-Wall" + flags="-Wextra" + # turn off noise from Wextra + flags="$flags -Wno-unused-parameter -Wno-sign-compare" + flags="$flags -Wno-missing-field-initializers" + # Wextra does not turn those on? + flags="$flags -Wmissing-prototypes -Wpointer-arith -Wendif-labels" + flags="$flags -Wdeclaration-after-statement -Wold-style-definition" + flags="$flags -Wstrict-prototypes" + for f in $flags; do + CFLAGS="$good_CFLAGS $good $f" + AC_COMPILE_IFELSE([void foo(void){}], [good="$good $f"]) + done + CFLAGS="$good_CFLAGS $good" + AC_MSG_RESULT([$good]) +fi + +dnl Checks for header files. +AC_CHECK_HEADERS([crypt.h sys/socket.h sys/ucred.h]) + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_INLINE +AC_TYPE_PID_T +AC_TYPE_SIZE_T +AC_TYPE_UINT8_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_SYS_LARGEFILE + +dnl Checks for library functions. +AC_CHECK_FUNCS(strlcpy strlcat) +AC_SEARCH_LIBS(crypt, crypt, [], AC_MSG_ERROR([crypt not found])) + +dnl Find libevent +AC_MSG_CHECKING([for libevent]) +AC_ARG_WITH(libevent, + AC_HELP_STRING([--with-libevent=prefix],[Specify where libevent is installed]), + [ test "$withval" = "no" && AC_MSG_ERROR("cannot work without libevent") + CPPFLAGS="$CPPFLAGS -I$withval/include" + LDFLAGS="$LDFLAGS -L$withval/lib" ]) +LIBS="$LIBS -levent" +AC_LINK_IFELSE([ + #include + #include + #include + #include + int main(void) { + struct event ev; + event_init(); + event_set(&ev, 1, EV_READ, NULL, NULL); + } ], +[AC_MSG_RESULT([found])], +[AC_MSG_ERROR([not found])]) + +# autoconf does not want to find 'install', if not using automake... +INSTALL=install + +AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug],[build binary with debugging symbols])) +AC_MSG_CHECKING([whether to build debug binary]) +if test "$enable_debug" = "yes"; then + LDFLAGS="-g $LDFLAGS" + CFLAGS="`echo $CFLAGS | sed -e 's/-O2/-O/g'`" + BININSTALL="$INSTALL" + AC_MSG_RESULT([yes]) +else + if test x"$GCC" = xyes; then + CFLAGS="$CFLAGS -fomit-frame-pointer" + fi + BININSTALL="$INSTALL -s" + AC_MSG_RESULT([no]) +fi +AC_SUBST(INSTALL) +AC_SUBST(BININSTALL) + +AC_ARG_ENABLE(cassert, AC_HELP_STRING([--enable-cassert],[turn on assert checking in code])) +AC_MSG_CHECKING([whether to enable asserts]) +if test "$enable_cassert" = "yes"; then + AC_DEFINE(CASSERT, 1, [Define to enable assert checking]) + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi + +AC_ARG_ENABLE(werror, AC_HELP_STRING([--enable-werror],[add -Werror to CFLAGS])) +AC_MSG_CHECKING([whether to fail on warnings]) +if test "$enable_werror" = "yes"; then + CFLAGS="$CFLAGS -Werror" + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi + +dnl Output findings +AC_OUTPUT([config.mak]) + +dnl If separate build dir, link Makefile over +test -f Makefile || { + echo "Linking Makefile" + ln -s $srcdir/Makefile +} + diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..da9cacc --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +pgbouncer (1.0) unstable; urgency=low + + * Public release. + + -- Marko Kreen Tue, 13 Mar 2007 17:30:02 +0200 diff --git a/debian/packages b/debian/packages new file mode 100644 index 0000000..aca0857 --- /dev/null +++ b/debian/packages @@ -0,0 +1,27 @@ +## debian/packages for pgbouncer + +Source: pgbouncer +Section: contrib/misc +Priority: extra +Maintainer: Marko Kreen +Standards-Version: 3.6.2 +Description: Lightweight connection pooler for PostgreSQL +Copyright: BSD + Copyright 2007 Marko Kreen, Skype Technologies +Build: sh + CPPFLAGS="-I$HOME/src/libevent" \ + LDFLAGS="-L$HOME/src/libevent/.libs" \ + ./configure --prefix=/usr --enable-debug --enable-cassert + make V=1 +Clean: sh + make clean || true +#Build-Depends: libevent-dev + +Package: pgbouncer +Architecture: any +Contains: unstripped +Depends: [] +Description: Lightweight connection pooler for PostgreSQL + . +Install: sh + make install DESTDIR=$ROOT diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..68b19e1 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,12 @@ + +wiki = https://developer.skype.com/SkypeGarage/DbProjects/PgBouncer +web = mkz@shell.pgfoundry.org:/home/pgfoundry.org/groups/pgbouncer/htdocs/ + +all: + +upload: + devupload.sh overview.txt $(wiki) + devupload.sh todo.txt $(wiki)/ToDo + devupload.sh usage.txt $(wiki)/UsageInfo + devupload.sh config.txt $(wiki)/ConfigFile + diff --git a/doc/config.txt b/doc/config.txt new file mode 100644 index 0000000..b842ae4 --- /dev/null +++ b/doc/config.txt @@ -0,0 +1,229 @@ +#pragma section-numbers 2 + += PgBouncer Config = + +[[TableOfContents]] + +Config file is in "ini" format. Section names are between "[" and "]". +Lines starting with ";" or "#" are taken as comment and ignored. The characters +";" and "#" are not recognized when they appear later in the line. + + + +== Section [pgbouncer] == + +=== Generic settings === + +==== logfile ==== +Specifies log file. Logging is done by open/write/close, so it can be safely +rotasted, without informing pooler. + +Default: not set. + + +==== pidfile ==== +Specifies pid file. Without pidfile, the daemonization is not allowed. + +Default: not set. + + +==== listen_addr ==== +Specifies IPv4 address, where to listen for TCP connections. Or "*" +meaning "listen on all addresses". When not set, only unix socket +connections are allowed. + +Default: not set. + +==== listen_port ==== +On which port to listen on. Applies to both TCP and Unix sockets. + +Default: 6000 + +==== unix_socket_dir ==== +Specifies location for Unix sockets. Applies to both listening socket +and server connections. If set to empty string, Unix sockets are disabled. + +Default: /tmp + +==== auth_file ==== + +Load user names and passwords from this file. File format used +is same as for PostgreSQL pg_auth/pg_pwd file, so can be pointed +directly to backend file. + +Default: not set. + +==== auth_type ==== +How to authenticate users. + + md5:: + Use MD5-based password check. auth_file may contain both md5-encrypted + or plain-text passwords. + + crypt:: + Use crypt(3) based bassword check. auth_file must contain plain-text + passwords. + + plain:: + Clear-text password is sent over wire. + + trust:: + No authentication is done. Username must still exists in auth_file. + + any:: + Like `trust` but username given is ignored. Requires that all databases + have configured to log in as specific user. + +Default: md5 + +==== pool_mode ==== +Specifies when server connection is tagged as reusable for other clients. + + session:: + Server is released back to pool after client disconnects. + + transaction:: + Server is released back to pool after transaction finishes. + + statement:: + Server is released back to pool after query finishes. Long transactions + spanning multiple statements are disallowed in this mode. + +Default: `session`. + +==== max_client_conn ==== + +Maximin number of client connections allowed. + +==== default_pool_size ==== + +How many server connection to allow per user/database pair. +Can be overrided in per-database config. + +=== Console access control === + +==== admin_users ==== +List of users that are allowed to run all commands on console. + +==== stats_users ==== + +List of users that are allowed to run read-only queries on console. +Thats means all SHOW commands except SHOW FDS. + +=== Connection sanity checks, timeouts === + +==== server_check_delay ==== + +How long to keep released immidiately available, without running +sanity-check query on it. If 0 then the query is ran always. + +==== server_check_query ==== + +Good variants are `SELECT 1;`, to just see if connection is alive +and `ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT` +to do full reset. + +If empty string, then sanity checking is disabled. + +==== server_lifetime ==== + +Pooler tries to close server connections that are been connected +longer than this. + +==== server_idle_timeout ==== + +If server connection has been idle more than this then there's too many +connections in the pool and this on can be dropped. + +==== server_connect_timeout ==== + +If connection and login wont finish in this time, the connection will +be closed. + +==== server_login_retry ==== + +If login failed, because of failure from connect() or authentication +that pooler waits this much before retrying to connect. + +==== query_timeout ==== + +Queries running longer than that are canceled. This should be used +only with slightly smaller server-side statement_timeout, to apply only +for network problems. + +Default: 0 (disabled) + +==== client_idle_timeout ==== + +Client connections idling longer than that are closed. + +Default: 0 (disabled) + +=== Low-level network settings === + +==== pkt_buf ==== + +Internal buffer size for packets. Affects size of TCP packets sent +and general memory usage. Actual libpq packets can be larger than this +so no need to set it large. + +Default: 2048 + +==== tcp_defer_accept ==== + +Details about following options shouldbe looked from `man 7 tcp` + +Default: 45 on Linux, otherwise 0 + +==== tcp_socket_buffer ==== + +Default: not set + +==== tcp_keepalive ==== + +Default: Not set + +==== tcp_keepcnt ==== +Default: not set + +==== tcp_keepidle ==== +Default: not set +==== tcp_keepintvl ==== +Default: not set + +== Section [databases] == + +This contains key=value pairs where key will be taken as database name and value as +libpq-connstring style list of key=value pairs. As actual libpq is not used, so +not all features from libpq can be used (service=, quoting). + +=== dbname === + +Destination database name. + +Default: same as client-side database name. + +=== host === + +IP-address to connect to. + +Default: not set, meaning to use unix-socket. + +=== port === + +Default: 5432 + +=== user, password === + +If user= is set, all connections to destination database will be done +with that user, meaning that there will be only one pool for this database. + +Otherwise pgbouncer tries to log into destination database with client username, +meaning that there will be one pool per user. + +=== client_encoding, datestyle === + +As pgbouncer does not pass client startup packet to server, there is no way of specifying +startup paramenters to dest database. These paramenters make possible to set startup +paramenters in pgbouncer config. Escpecially, client_encoding=UNICODE is needed to work +around JDBC driver bug. diff --git a/doc/overview.txt b/doc/overview.txt new file mode 100644 index 0000000..9c04630 --- /dev/null +++ b/doc/overview.txt @@ -0,0 +1,44 @@ + += PgBouncer = + +Lightweight connection pooler for PostgreSQL. + +Downloads, bugtracker, CVS: http://pgfoundry.org/projects/pgbouncer + +== Features == + + * Several levels of brutality when rotating connections: + + Session pooling:: + Most polite method. When client connects, a server connection + will be assigned to it for the whole duration it stays connected. + When client disconnects, the server connection will be put back + into pool. + + Transaction pooling:: + Server connection is assigned to client only during a transaction. + When PgBouncer notices that transaction is over, the server + will be put back into pool. + + Statement pooling:: + Most aggressive method. The server connection will be put back into + pool immidiately after a query completes. Multi-statement + transactions are disallowed in this mode as they would break. + + * Low memory requirements (2k per connection by default). This is due + to the fact that PgBouncer does not need to see full packet at once. + + * It is not tied to one backend server, the destination databases can + reside on different hosts. + + * Supports online reconfiguration for most of the settings. + + * Supports online restart - is able transfer sockets to new process. + + * Supports protocol V3 only, so backend version must be >= 7.4. + +== Docs == + + * Detailed usage info: ./UsageInfo + * COnfig file help: ./ConfigFile + * TODO list: ./ToDo diff --git a/doc/todo.txt b/doc/todo.txt new file mode 100644 index 0000000..9e576de --- /dev/null +++ b/doc/todo.txt @@ -0,0 +1,21 @@ += PgBouncer TODO list = + + * -R should detect that no pooler is running and boot normally + * -R should detect if login fails then exit() + * PAUSE ; RESUME ; + + * keep stats about error counts? + * SHUTDOWN cmd should print notice? + * before loading users, disable all existing? + + * log_connects, log_disconnects settings + +== Bugs == + + * Bouncer can get into situation where SUSPEND wont work (stalls), + thus making reboot impossible. + + * Light load with small server_check_delay creates situation where + some clients may never get server connection. Applied a hack to + fix this, needs more analysis. + diff --git a/doc/usage.txt b/doc/usage.txt new file mode 100644 index 0000000..960800f --- /dev/null +++ b/doc/usage.txt @@ -0,0 +1,153 @@ +#pragma section-numbers 2 + += PgBouncer usage details = + +[[TableOfContents]] + +== Building == + +PgBouncer uses [http://monkey.org/~provos/libevent/ libevent] +for low-level socket handling. When this is installed just run: + +{{{ +$ ./configure --prefix=/usr/local --with-libevent=/prefix +$ make +$ make install +}}} + +== Command line usage == +{{{ +pgbouncer [-d][-R][-v] config.ini +pgbouncer -V|-h +}}} + +Where switches are: + + -d:: + Run in background. Without it the process will run in foreground. + + -R:: + Do a online restart. That means connecting to running process, + loading open sockets from it and using them. + + -v:: + Increase verbosity. + + -V:: + Show version. + + -h:: + Show short help. + + +== Admin Console == + +There is always a extra database available: "pgbouncer". +When connecected to it, there is possible to look and change +pooler settings. + +{{{ +SHOW STATS; +}}} +Shows statistics. + +{{{ +SHOW SERVERS; +SHOW CLIENTS; +SHOW POOLS; +SHOW LISTS; +}}} +Shows internal info. + +{{{ +SHOW USERS; +SHOW DATABASES; +}}} +Shows loaded users and databases. + +{{{ +SHOW FDS; +}}} +Shows list of fds in use. When the connected user has username +"pgbouncer", connects thru unix socket and has same UID as running process +the actual fds are passed over connection. This mechanism is used +to do online restart. + +{{{ +PAUSE; +}}} +PgBouncer tries to disconnect from all servers, first waiting for +all queries to complete. The command will not return before all is done. + +{{{ +SUSPEND; +}}} + +All socket buffers are flushed and PgBouncer stops listening data on them. +The command will not return before all is done. + +{{{ +RESUME +}}} + +Resume work from previous PAUSE or SUSPEND command. + +{{{ +SHUTDOWN +}}} +The PgBouncer process will exit. + + +== Online restart == + +PgBouncer supports restart without dropping connections. When launched +with switch "-R", it will connect to running PgBouncer process via +unix socket and issues commands: + +{{{ +SUSPEND; +SHOW FDS; +SHUTDOWN; +}}} +Then it waits until old process shuts down and then starts listening on +aquired sockets. + +== Signals == + + SIGHUP:: + Reload config. + + SIGINT:: + Safe shutdown. + + SIGTERM:: + Immidiate shutdown. + +== libevent settings == + +From libevent docs: + +{{{ +It is possible to disable support for epoll, kqueue, devpoll, poll or select +by setting the environment variable EVENT_NOEPOLL, EVENT_NOKQUEUE, EVENT_NODEVPOLL, +EVENT_NOPOLL or EVENT_NOSELECT, respectively. By setting the environment variable +EVENT_SHOW_METHOD, libevent displays the kernel notification method that it uses. +}}} + +== Authentication file format == + +PgBouncer needs its own user database. The users are loaded from +text file that should be in same format as PostgreSQL's pg_auth/pg_pwd +file. + +{{{ +"username1" "password" ... +"username2" "md12342345234" ... +}}} + +There shoud be at least 2 fields, surrounded by double quotes. First +is username and second either plain-text or md5-hashed password. +PgBouncer ignores rest of the line. + +Such file format allows to direct PgBouncer directly to PostgreSQL +user file under data directory. diff --git a/etc/pgbouncer.ini b/etc/pgbouncer.ini new file mode 100644 index 0000000..027ef28 --- /dev/null +++ b/etc/pgbouncer.ini @@ -0,0 +1,142 @@ +;; database name = connect string +[databases] + +; foodb over unix socket +foodb = + +; redirect bardb to bazdb on localhost +bardb = host=127.0.0.1 dbname=bazdb + +; acceess to dest database will go with single user +forcedb = host=127.0.0.1 port=300 user=baz password=foo client_encoding=UNICODE datestyle=ISO + +;; Configuation section +[pgbouncer] + +;;; +;;; Administrative settings +;;; + +logfile = pgbouncer.log +pidfile = pgbouncer.pid + +;;; +;;; Where to wait for clients +;;; + +; ip address or * which means all ip-s +listen_addr = 127.0.0.1 +listen_port = 6000 +unix_socket_dir = /tmp + +;;; +;;; Authentication settings +;;; + +; any, trust, plain, crypt, md5 +auth_type = trust +#auth_file = 8.0/main/global/pg_auth +auth_file = etc/userlist.txt + +;;; +;;; Users allowed into database 'pgbouncer' +;;; + +; comma-separated list of users, who are allowed to change settings +admin_users = user2, someadmin, otheradmin + +; comma-separated list of users who are just allowed to use SHOW command +stats_users = stats, root + +;;; +;;; Pooler personality questions +;;; + +; When server connection is released back to pool: +; session - after client disconnects +; transaction - after transaction finishes +; statement - after statement finishes +pool_mode = session + +; When taking idle server into use, this query is ran first. +; +; Query for session pooling: +; ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT +; Query for statement/transaction pooling: +; SELECT 1 +; Empty query disables the functionality +server_check_query = select 1 + +; If server was used more recently that this many seconds ago, +; skip the check query. If 0, the check query is always ran. +server_check_delay = 10 + +;;; +;;; Connection limits +;;; + +; total number of clients that can connect +max_client_conn = 100 +default_pool_size = 20 + +;;; +;;; Timeouts +;;; + +;; Close server connection if its been connected longer. +;server_lifetime = 1200 + +;; Close server connection if its not been used in this time. +;; Allows to clean unneccessary connections from pool after peak. +;server_idle_timeout = 60 + +;; Cancel connection attepmt if server does not answer takes longer. +;server_connect_timeout = 15 + +;; If server login failed (server_connect_timeout or auth failure) +;; then wait this many second. +;server_login_retry = 15 + +;; Dangerous. Server connection is closed if query does not return +;; in this time. Should be used to survive network problems, +;; _not_ as statement_timeout. (default: 0) +;query_timeout = 0 + +;; Dangerous. Client connection is closed if no activity in this time. +;; Should be used to survive network problems. (default: 0) +;client_idle_timeout = 0 + + +;;; +;;; Low-level tuning options +;;; + +;; buffer for streaming packets +;pkt_buf = 2048 + +;; networking options, for info: man 7 tcp + +;; linux: notify program about new connection only if there +;; is also data received. (Seconds to wait.) +;; On Linux the default is 45, on other OS'es 0. +;tcp_defer_accept = 0 + +;; In-kernel buffer size (linux default: 4096) +;tcp_socket_buffer = 0 + +;; whether tcp keepalive should be turned on (0/1) +;tcp_keepalive = 0 + +;; following options are linux-specific. +;; they also require tcp_keepalive=1 + +;; count of keepaliva packets +;tcp_keepcnt = 0 + +;; how long the connection can be idle, +;; before sending keepalive packets +;tcp_keepidle = 0 + +;; The time between individual keepalive probes. +;tcp_keepintvl = 0 + diff --git a/etc/small.ini b/etc/small.ini new file mode 100644 index 0000000..7692e9c --- /dev/null +++ b/etc/small.ini @@ -0,0 +1,30 @@ + +[databases] +evtest = host=127.0.0.1 +provider = host=127.0.0.1 +postgres = host=127.0.0.1 +orderdb = host=127.0.0.1 +forcedb = host=127.0.0.1 port=300 user=baz password=foo client_encoding=UNICODE datestyle=ISO +marko = host=127.0.0.1 port=5432 pool_size=5 +orderdb_test = host=192.168.125.155 +test_part = host=127.0.0.1 + +[pgbouncer] +logfile = pgbouncer.log +;pidfile = pgbouncer.pid + +listen_addr = 127.0.0.1 +listen_port = 6000 +unix_socket_dir = /tmp + +; any, trust, plain, crypt, md5 +auth_type = trust +#auth_file = 8.0/main/global/pg_auth +auth_file = etc/userlist.txt + +; session, transaction, statement +pool_mode = session + +max_client_conn = 100 +default_pool_size = 20 + diff --git a/etc/test.ini b/etc/test.ini new file mode 100644 index 0000000..503c31b --- /dev/null +++ b/etc/test.ini @@ -0,0 +1,31 @@ +[databases] +marko = host=127.0.0.1 + +[pgbouncer] +logfile = lib/pgbouncer.log +pidfile = lib/pgbouncer.pid + +#listen_addr = 127.0.0.1 +listen_port = 6000 +unix_socket_dir = /tmp + +; any, trust, plain, crypt, md5 +auth_type = trust +auth_file = etc/test.users + +; When server connection is released back to pool: +; session - after client disconnects +; transaction - after transaction finishes +; statement - after statement finishes +pool_mode = transaction + +server_check_query = select 1 +server_check_delay = 10 +max_client_conn = 2000 +default_pool_size = 80 + +admin_users = plproxy +stats_users = marko + +stats_period = 60 + diff --git a/etc/test.users b/etc/test.users new file mode 100644 index 0000000..5dd5911 --- /dev/null +++ b/etc/test.users @@ -0,0 +1,12 @@ +"admin" "" "" +"backoffice" "" "" +"info" "" "" +"martinp" "md55c06ac8c93212495f8eaf6a7ffd688dd" "" +"plproxy" "md5a704fc5c9a4bf2f745acc6f7a7ec2f2f" "" +"postgres" "md5264abda62970ba635b133f545ce12132" "" +"priitk" "md55c08f2e34592ddb13972db7eaadc1232" "" +"replicator" "" "" +"webstore" "" "" +"wypbe" "md57e17e9c6cfde1c1f6f9155071d7d18a8" "" +"wypfe" "md5e3b7c35f688032d97ab066210a33184b" "" +"marko" "funky" diff --git a/etc/userlist.txt b/etc/userlist.txt new file mode 100644 index 0000000..166f46d --- /dev/null +++ b/etc/userlist.txt @@ -0,0 +1,3 @@ +"marko" "asdasd" +"postgres" "asdasd" +"pgbouncer" "fake" diff --git a/src/admin.c b/src/admin.c new file mode 100644 index 0000000..b6fd1a0 --- /dev/null +++ b/src/admin.c @@ -0,0 +1,888 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bouncer.h" + +#include + +/* regex elements */ +#define WS0 "[ \t\n\r]*" +#define WS1 "[ \t\n\r]+" +#define WORD "([0-9a-z_]+)" +#define STRING "'(([^']*|'')*)'" + +/* possible max + 1 */ +#define MAX_GROUPS 10 + +/* group numbers */ +#define SHOW_ARG 1 +#define SET_KEY 1 +#define SET_VAL 2 +#define SINGLECMD 1 + +/* SHOW */ +static const char cmd_show_rx[] = +"^" WS0 "show" WS1 WORD "?" WS0 ";" WS0 "$"; + +/* SET with simple value */ +static const char cmd_set_word_rx[] = +"^" WS0 "set" WS1 WORD WS0 "=" WS0 WORD WS0 ";" WS0 "$"; + +/* SET with quoted value */ +static const char cmd_set_str_rx[] = +"^" WS0 "set" WS1 WORD WS0 "=" WS0 STRING WS0 ";" WS0 "$"; + +/* single word cmd */ +static const char cmd_single_rx[] = +"^" WS0 WORD ";" WS0 "$"; + +/* compiled regexes */ +static regex_t rc_show; +static regex_t rc_set_word; +static regex_t rc_set_str; +static regex_t rc_single; + +static PgPool *admin_pool; + +bool admin_error(PgSocket *admin, const char *fmt, ...) +{ + char str[1024]; + va_list ap; + bool res = true; + + va_start(ap, fmt); + vsnprintf(str, sizeof(str), fmt, ap); + va_end(ap); + + log_error("%s", str); + if (admin) + res = send_pooler_error(admin, true, str); + return res; +} + +void admin_flush(PgSocket *admin, PktBuf *buf, const char *desc) +{ + pktbuf_write_CommandComplete(buf, desc); + pktbuf_write_ReadyForQuery(buf); + pktbuf_send_queued(buf, admin); +} + +bool admin_ready(PgSocket *admin, const char *desc) +{ + PktBuf buf; + uint8 tmp[512]; + pktbuf_static(&buf, tmp, sizeof(tmp)); + pktbuf_write_CommandComplete(&buf, desc); + pktbuf_write_ReadyForQuery(&buf); + return pktbuf_send_immidiate(&buf, admin); +} + +/* Command: SET key = val; */ +static bool admin_set(PgSocket *admin, const char *key, const char *val) +{ + char tmp[512]; + + if (admin->admin_user) { + if (set_config_param(bouncer_params, key, val, true, admin)) { + snprintf(tmp, sizeof(tmp), "SET %s=%s", key, val); + return admin_ready(admin, tmp); + } else { + return admin_error(admin, "SET failed"); + } + } else + return admin_error(admin, "admin access needed"); +} + +/* send a row with sendmsg, optionally attaching a fd */ +static bool send_one_fd(PgSocket *admin, + int fd, const char *task, + const char *user, const char *db, + const char *addr, int port, + uint64 ckey, int link) +{ + struct msghdr msg; + struct cmsghdr *cmsg; + int res; + struct iovec iovec; + uint8 pktbuf[1024]; + uint8 cntbuf[CMSG_SPACE(sizeof(int))]; + + iovec.iov_base = pktbuf; + BUILD_DataRow(res, pktbuf, sizeof(pktbuf), "issssiqi", + fd, task, user, db, addr, port, ckey, link); + if (res < 0) + return false; + iovec.iov_len = res; + + /* sending fds */ + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iovec; + msg.msg_iovlen = 1; + + /* attach a fd */ + if (admin->addr.is_unix && admin->own_user) { + msg.msg_control = cntbuf; + msg.msg_controllen = sizeof(cntbuf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); + msg.msg_controllen = cmsg->cmsg_len; + } + + slog_debug(admin, "sending socket list: fd=%d, len=%d", + fd, msg.msg_controllen); + res = safe_sendmsg(sbuf_socket(&admin->sbuf), &msg, 0); + if (res < 0) { + log_error("send_one_fd: sendmsg error: %s", strerror(errno)); + return false; + } else if (res != iovec.iov_len) { + log_error("send_one_fd: partial sendmsg"); + return false; + } + return true; +} + +/* send a row with sendmsg, optionally attaching a fd */ +static bool show_one_fd(PgSocket *admin, PgSocket *sk) +{ + PgAddr *addr = &sk->addr; + MBuf tmp; + + mbuf_init(&tmp, sk->cancel_key, 8); + + return send_one_fd(admin, sbuf_socket(&sk->sbuf), + is_server_socket(sk) ? "server" : "client", + sk->auth_user ? sk->auth_user->name : NULL, + sk->pool ? sk->pool->db->name : NULL, + addr->is_unix ? "unix" : inet_ntoa(addr->ip_addr), + addr->port, + mbuf_get_uint64(&tmp), + sk->link ? sbuf_socket(&sk->link->sbuf) : 0); +} + +/* send a row with sendmsg, optionally attaching a fd */ +static bool show_pooler_fds(PgSocket *admin) +{ + int fd_net, fd_unix; + bool res = true; + + get_pooler_fds(&fd_net, &fd_unix); + + if (fd_net) + res = send_one_fd(admin, fd_net, "pooler", NULL, NULL, + cf_listen_addr, cf_listen_port, 0, 0); + if (fd_unix && res) + res = send_one_fd(admin, fd_unix, "pooler", NULL, NULL, + "unix", cf_listen_port, 0, 0); + return res; +} + +static bool show_fds_from_list(PgSocket *admin, StatList *list) +{ + List *item; + PgSocket *sk; + bool res = true; + + statlist_for_each(item, list) { + sk = container_of(item, PgSocket, head); + res = show_one_fd(admin, sk); + if (!res) + break; + } + return res; +} + +/* + * Command: SHOW FDS + * + * If privileged connection, send also actual fds + */ +static bool admin_show_fds(PgSocket *admin) +{ + List *item; + PgPool *pool; + bool res; + + /* + * Dangerous to show to everybody: + * - can lock pooler as code flips async option + * - show cancel keys for all users + */ + if (!admin->admin_user) + return admin_error(admin, "admin access needed"); + + /* + * Its very hard to send it reliably over in async manner, + * so turn async off for this resultset. + */ + socket_set_nonblocking(sbuf_socket(&admin->sbuf), 0); + + /* + * send resultset + */ + SEND_RowDescription(res, admin, "issssiqi", + "fd", "task", + "user", "database", + "addr", "port", + "cancel", "link"); + if (res) + res = show_pooler_fds(admin); + + if (res) + res = show_fds_from_list(admin, &login_client_list); + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + if (pool->admin) + continue; + res = res && show_fds_from_list(admin, &pool->active_client_list); + res = res && show_fds_from_list(admin, &pool->waiting_client_list); + res = res && show_fds_from_list(admin, &pool->active_server_list); + res = res && show_fds_from_list(admin, &pool->idle_server_list); + res = res && show_fds_from_list(admin, &pool->used_server_list); + res = res && show_fds_from_list(admin, &pool->tested_server_list); + res = res && show_fds_from_list(admin, &pool->new_server_list); + if (!res) + break; + } + if (res) + res = admin_ready(admin, "SHOW"); + + /* turn async back on */ + socket_set_nonblocking(sbuf_socket(&admin->sbuf), 1); + + return res; +} + +/* Command: SHOW DATABASES */ +static bool admin_show_databases(PgSocket *admin) +{ + PgDatabase *db; + List *item; + char *host; + const char *f_user; + PktBuf *buf; + + buf = pktbuf_dynamic(256); + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + + pktbuf_write_RowDescription(buf, "ssissi", + "name", "host", "port", + "database", "force_user", "pool_size"); + statlist_for_each(item, &database_list) { + db = container_of(item, PgDatabase, head); + + if (!db->addr.is_unix) { + host = inet_ntoa(db->addr.ip_addr); + } else + host = NULL; + + f_user = db->forced_user ? db->forced_user->name : NULL; + pktbuf_write_DataRow(buf, "ssissi", + db->name, host, db->addr.port, + db->dbname, f_user, + db->pool_size); + } + admin_flush(admin, buf, "SHOW"); + return true; +} + + +/* Command: SHOW LISTS */ +static bool admin_show_lists(PgSocket *admin) +{ + PktBuf *buf = pktbuf_dynamic(256); + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + pktbuf_write_RowDescription(buf, "si", "list", "items"); +#define SENDLIST(name, size) pktbuf_write_DataRow(buf, "si", (name), (size)) + SENDLIST("databases", statlist_count(&database_list)); + SENDLIST("users", statlist_count(&user_list)); + SENDLIST("pools", statlist_count(&pool_list)); + SENDLIST("free_clients", statlist_count(&free_client_list)); + SENDLIST("used_clients", get_active_client_count()); + SENDLIST("login_clients", statlist_count(&login_client_list)); + SENDLIST("free_servers", statlist_count(&free_server_list)); + SENDLIST("used_servers", get_active_server_count()); + admin_flush(admin, buf, "SHOW"); + return true; +} + +/* Command: SHOW USERS */ +static bool admin_show_users(PgSocket *admin) +{ + PgUser *user; + List *item; + PktBuf *buf = pktbuf_dynamic(256); + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + pktbuf_write_RowDescription(buf, "s", "name"); + statlist_for_each(item, &user_list) { + user = container_of(item, PgUser, head); + pktbuf_write_DataRow(buf, "s", user->name); + } + admin_flush(admin, buf, "SHOW"); + return true; +} + +/* Helper for SHOW CLIENTS */ +static void show_client_list(PktBuf *buf, StatList *list, const char *state) +{ + List *item; + PgSocket *client; + const char *addr; + + statlist_for_each(item, list) { + client = container_of(item, PgSocket, head); + addr = client->addr.is_unix ? "unix" + : inet_ntoa(client->addr.ip_addr); + + pktbuf_write_DataRow(buf, "ssssiTT", + client->auth_user->name, + client->pool->db->name, + state, addr, client->addr.port, + client->connect_time, + client->request_time); + } +} + +/* Command: SHOW CLIENTS */ +static bool admin_show_clients(PgSocket *admin) +{ + List *item; + PgPool *pool; + PktBuf *buf = pktbuf_dynamic(256); + + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + pktbuf_write_RowDescription(buf, "ssssiTT", + "user", "database", "state", + "addr", "port", "connect_time", "request_time"); + /* todo: age? query stats? */ + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + + show_client_list(buf, &pool->active_client_list, "active"); + show_client_list(buf, &pool->waiting_client_list, "waiting"); + } + + admin_flush(admin, buf, "SHOW"); + return true; +} + +/* Helper for SHOW SERVERS */ +static void show_server_list(PktBuf *buf, StatList *list, const char *state) +{ + List *item; + PgSocket *server; + const char *addr; + + statlist_for_each(item, list) { + server = container_of(item, PgSocket, head); + addr = server->addr.is_unix ? "unix" + : inet_ntoa(server->addr.ip_addr); + + pktbuf_write_DataRow(buf, "ssssiTT", + server->auth_user->name, + server->pool->db->name, + state, addr, server->addr.port, + server->connect_time, + server->request_time + ); + } +} + +/* Command: SHOW SERVERS */ +static bool admin_show_servers(PgSocket *admin) +{ + List *item; + PgPool *pool; + PktBuf *buf; + + buf = pktbuf_dynamic(256); + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + pktbuf_write_RowDescription(buf, "ssssiTT", + "database", "user", "state", + "addr", "port", "connect_time", "request_time"); + /* todo: age? query stats */ + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + show_server_list(buf, &pool->active_server_list, "active"); + show_server_list(buf, &pool->idle_server_list, "idle"); + show_server_list(buf, &pool->used_server_list, "used"); + show_server_list(buf, &pool->tested_server_list, "tested"); + } + admin_flush(admin, buf, "SHOW"); + return true; +} + +/* Command: SHOW POOLS */ +static bool admin_show_pools(PgSocket *admin) +{ + List *item; + PgPool *pool; + PktBuf *buf; + + buf = pktbuf_dynamic(256); + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + pktbuf_write_RowDescription(buf, "ssiiiiiii", + "database", "user", + "cl_active", "cl_waiting", + "sv_active", "sv_idle", + "sv_used", "sv_tested", + "sv_login"); + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + pktbuf_write_DataRow(buf, "ssiiiiiii", + pool->db->name, pool->user->name, + statlist_count(&pool->active_client_list), + statlist_count(&pool->waiting_client_list), + statlist_count(&pool->active_server_list), + statlist_count(&pool->idle_server_list), + statlist_count(&pool->used_server_list), + statlist_count(&pool->tested_server_list), + statlist_count(&pool->new_server_list)); + } + admin_flush(admin, buf, "SHOW"); + return true; +} + +/* Command: SHOW CONFIG */ +static bool admin_show_config(PgSocket *admin) +{ + ConfElem *cf; + int i = 0; + PktBuf *buf; + + buf = pktbuf_dynamic(256); + if (!buf) { + admin_error(admin, "no mem"); + return true; + } + + pktbuf_write_RowDescription(buf, "sss", "key", "value", "changeable"); + while (1) { + cf = &bouncer_params[i++]; + if (!cf->name) + break; + + pktbuf_write_DataRow(buf, "sss", + cf->name, conf_to_text(cf), + cf->reloadable ? "yes" : "no"); + } + admin_flush(admin, buf, "SHOW"); + return true; +} + +/* Command: RELOAD */ +static bool admin_cmd_reload(PgSocket *admin) +{ + if (!admin->admin_user) + return admin_error(admin, "admin access needed"); + + log_info("RELOAD command issued"); + load_config(true); + return admin_ready(admin, "RELOAD"); +} + +/* Command: SHUTDOWN */ +static bool admin_cmd_shutdown(PgSocket *admin) +{ + if (!admin->admin_user) + return admin_error(admin, "admin access needed"); + + log_info("SHUTDOWN command issued"); + exit(0); + return true; +} + +/* Command: RESUME */ +static bool admin_cmd_resume(PgSocket *admin) +{ + int tmp_mode = cf_pause_mode; + if (!admin->admin_user) + return admin_error(admin, "admin access needed"); + + log_info("RESUME command issued"); + cf_pause_mode = 0; + switch (tmp_mode) { + case 2: + resume_all(); + case 1: + return admin_ready(admin, "RESUME"); + default: + return admin_error(admin, "Pooler is not paused/suspended"); + } +} + +/* Command: SUSPEND */ +static bool admin_cmd_suspend(PgSocket *admin) +{ + if (!admin->admin_user) + return admin_error(admin, "admin access needed"); + + if (cf_pause_mode) + return admin_error(admin, "already suspended/paused"); + + log_info("SUSPEND command issued"); + cf_pause_mode = 2; + admin->wait_for_response = 1; + suspend_pooler(); + + return true; +} + +/* Command: PAUSE */ +static bool admin_cmd_pause(PgSocket *admin) +{ + if (!admin->admin_user) + return admin_error(admin, "admin access needed"); + + if (cf_pause_mode) + return admin_error(admin, "already suspended/paused"); + + log_info("PAUSE command issued"); + cf_pause_mode = 1; + admin->wait_for_response = 1; + + return true; +} + +/* extract substring from regex group */ +static void copy_arg(const char *src, regmatch_t *glist, + int gnum, char *dst, int dstmax) +{ + regmatch_t *g = &glist[gnum]; + unsigned len = g->rm_eo - g->rm_so; + if (len < dstmax) + memcpy(dst, src + g->rm_so, len); + else + len = 0; + dst[len] = 0; +} + +/* extract quoted substring from regex group */ +static void copy_arg_unquote(const char *str, regmatch_t *glist, + int gnum, char *dst, int dstmax) +{ + regmatch_t *g = &glist[gnum]; + int len = g->rm_eo - g->rm_so; + const char *src = str + g->rm_so; + const char *end = src + len; + + if (len < dstmax) { + len = 0; + while (src < end) { + if (src[0] == '\'' && src[1] == '\'') { + *dst++ = '\''; + src += 2; + } else + *dst++ = *src++; + } + } + *dst = 0; +} + +static bool admin_show_help(PgSocket *admin) +{ + bool res; + SEND_generic(res, admin, 'N', + "sssss", + "SNOTICE", "C00000", "MConsole usage", + "D\n\tSHOW [HELP|CONFIG|DATABASES|FDS" + "|POOLS|CLIENTS|SERVERS|LISTS|VERSION]\n" + "\tSET key = arg\n" + "\tRELOAD\n" + "\tPAUSE\n" + "\tSUSPEND\n" + "\tRESUME\n" + "\tSHUTDOWN", ""); + if (res) + res = admin_ready(admin, "SHOW"); + return res; +} + +static bool admin_show_version(PgSocket *admin) +{ + bool res; + SEND_generic(res, admin, 'N', + "ssss", "SNOTICE", "C00000", + "MPgBouncer version " PACKAGE_VERSION, ""); + if (res) + res = admin_ready(admin, "SHOW"); + return res; +} + +/* handle user query */ +static bool admin_parse_query(PgSocket *admin, const char *q) +{ + regmatch_t grp[MAX_GROUPS]; + char key[64]; + char val[256]; + bool res = true; + + if (regexec(&rc_show, q, MAX_GROUPS, grp, 0) == 0) { + copy_arg(q, grp, SHOW_ARG, key, sizeof(key)); + if (strcasecmp(key, "help") == 0) { + res = admin_show_help(admin); + } else if (strcasecmp(key, "stats") == 0) { + res = admin_database_stats(admin, &pool_list); + } else if (strcasecmp(key, "config") == 0) { + res = admin_show_config(admin); + } else if (strcasecmp(key, "databases") == 0) { + res = admin_show_databases(admin); + } else if (strcasecmp(key, "users") == 0) { + res = admin_show_users(admin); + } else if (strcasecmp(key, "pools") == 0) { + res = admin_show_pools(admin); + } else if (strcasecmp(key, "clients") == 0) { + res = admin_show_clients(admin); + } else if (strcasecmp(key, "servers") == 0) { + res = admin_show_servers(admin); + } else if (strcasecmp(key, "lists") == 0) { + res = admin_show_lists(admin); + } else if (strcasecmp(key, "fds") == 0) { + res = admin_show_fds(admin); + } else if (strcasecmp(key, "version") == 0) { + res = admin_show_version(admin); + } else + res = admin_error(admin, "bad SHOW arg, use SHOW HELP"); + } else if (regexec(&rc_set_str, q, MAX_GROUPS, grp, 0) == 0) { + copy_arg(q, grp, SET_KEY, key, sizeof(key)); + copy_arg_unquote(q, grp, SET_VAL, val, sizeof(val)); + if (!key[0] || !val[0]) { + res = admin_error(admin, "bad arguments"); + } else + res = admin_set(admin, key, val); + } else if (regexec(&rc_set_word, q, MAX_GROUPS, grp, 0) == 0) { + copy_arg(q, grp, SET_KEY, key, sizeof(key)); + copy_arg(q, grp, SET_VAL, val, sizeof(val)); + if (!key[0] || !val[0]) { + res = admin_error(admin, "bad arguments"); + } else + res = admin_set(admin, key, val); + } else if (regexec(&rc_single, q, MAX_GROUPS, grp, 0) == 0) { + copy_arg(q, grp, SINGLECMD, key, sizeof(key)); + if (strcasecmp(key, "SHUTDOWN") == 0) + res = admin_cmd_shutdown(admin); + else if (strcasecmp(key, "SUSPEND") == 0) + res = admin_cmd_suspend(admin); + else if (strcasecmp(key, "PAUSE") == 0) + res = admin_cmd_pause(admin); + else if (strcasecmp(key, "RESUME") == 0) + res = admin_cmd_resume(admin); + else if (strcasecmp(key, "RELOAD") == 0) + res = admin_cmd_reload(admin); + else + res = admin_error(admin, "unknown command: %s", q); + } else + res = admin_error(admin, "unknown cmd: %s", q); + + if (!res) + disconnect_client(admin, true, "failure"); + return res; +} + +/* handle packets */ +bool admin_handle_client(PgSocket *admin, MBuf *pkt, int pkt_type, int pkt_len) +{ + const char *q; + bool res; + + /* dont tolerate partial packets */ + if (mbuf_avail(pkt) < pkt_len - 5) { + disconnect_client(admin, true, "incomplete pkt"); + return false; + } + + switch (pkt_type) { + case 'Q': + q = mbuf_get_string(pkt); + if (!q) { + disconnect_client(admin, true, "incomplete query"); + return false; + } + log_debug("got admin query: %s", q); + res = admin_parse_query(admin, q); + if (res) + sbuf_prepare_skip(&admin->sbuf, pkt_len); + return res; + case 'X': + disconnect_client(admin, false, "close req"); + break; + default: + admin_error(admin, "unsupported pkt type: %d", pkt_type); + disconnect_client(admin, true, "bad pkt"); + break; + } + return false; +} + +/** + * Client is unauthenticated, look if it wants to connect + * to special "pgbouncer" user. + */ +bool admin_pre_login(PgSocket *client) +{ + uid_t peer_uid = 0; + bool res; + const char *username = client->auth_user->name; + + client->admin_user = 0; + client->own_user = 0; + + /* tag same uid as special */ + if (client->addr.is_unix) { + res = get_unix_peer_uid(sbuf_socket(&client->sbuf), &peer_uid); + if (res && peer_uid == getuid() + && strcmp("pgbouncer", username) == 0) + { + client->own_user = 1; + client->admin_user = 1; + slog_info(client, "pgbouncer access from unix socket"); + return true; + } + } + + if (strlist_contains(cf_admin_users, username)) { + client->admin_user = 1; + return true; + } else if (strlist_contains(cf_stats_users, username)) { + return true; + } + disconnect_client(client, true, "not allowed"); + return false; +} + +/* init special database and query parsing */ +void admin_setup(void) +{ + PgDatabase *db; + PgPool *pool; + PgUser *user; + PktBuf msg; + int res; + + /* fake database */ + db = add_database("pgbouncer"); + if (!db) + fatal("no mem for admin database"); + + db->addr.port = cf_listen_port; + db->addr.is_unix = 1; + db->pool_size = 2; + force_user(db, "pgbouncer", ""); + + /* fake pool, tag the it as special */ + pool = get_pool(db, db->forced_user); + if (!pool) + fatal("cannot create admin pool?"); + pool->admin = 1; + admin_pool = pool; + + /* fake user, with disabled psw */ + user = add_user("pgbouncer", ""); + if (!user) + fatal("cannot create admin user?"); + create_auth_cache(); + + /* prepare welcome */ + pktbuf_static(&msg, db->welcome_msg, sizeof(db->welcome_msg)); + pktbuf_write_AuthenticationOk(&msg); + pktbuf_write_ParameterStatus(&msg, "server_version", "8.0/bouncer"); + pktbuf_write_ParameterStatus(&msg, "client_encoding", "UNICODE"); + pktbuf_write_ParameterStatus(&msg, "server_encoding", "UNICODE"); + pktbuf_write_ParameterStatus(&msg, "is_superuser", "on"); + + db->welcome_msg_len = pktbuf_written(&msg); + db->welcome_msg_ready = 1; + + pktbuf_static(&msg, db->startup_params, sizeof(db->startup_params)); + pktbuf_put_string(&msg, "database"); + db->dbname = (char *)db->startup_params + pktbuf_written(&msg); + pktbuf_put_string(&msg, "pgbouncer"); + db->startup_params_len = pktbuf_written(&msg); + + /* initialize regexes */ + res = regcomp(&rc_show, cmd_show_rx, REG_EXTENDED | REG_ICASE); + if (res != 0) + fatal("cmd show regex compilation error"); + res = regcomp(&rc_set_word, cmd_set_word_rx, REG_EXTENDED | REG_ICASE); + if (res != 0) + fatal("set/word regex compilation error"); + res = regcomp(&rc_set_str, cmd_set_str_rx, REG_EXTENDED | REG_ICASE); + if (res != 0) + fatal("set/str regex compilation error"); + res = regcomp(&rc_single, cmd_single_rx, REG_EXTENDED | REG_ICASE); + if (res != 0) + fatal("singleword regex compilation error"); +} + +void admin_pause_done(void) +{ + List *item, *tmp; + PgSocket *admin; + + statlist_for_each_safe(item, &admin_pool->active_client_list, tmp) { + admin = container_of(item, PgSocket, head); + if (!admin->wait_for_response) + continue; + + switch (cf_pause_mode) { + case 1: + admin_ready(admin, "PAUSE"); + break; + case 2: + admin_ready(admin, "SUSPEND"); + break; + default: + fatal("admin_pause_done: bad state"); + } + admin->wait_for_response = 0; + } + + if (statlist_empty(&admin_pool->active_client_list) + && cf_pause_mode == 2) + { + log_info("Admin disappeared when suspended, doing RESUME"); + cf_pause_mode = 0; + resume_all(); + } +} + diff --git a/src/admin.h b/src/admin.h new file mode 100644 index 0000000..dcfd14c --- /dev/null +++ b/src/admin.h @@ -0,0 +1,24 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +bool admin_handle_client(PgSocket *client, MBuf *pkt, int pkt_type, int pkt_len); +bool admin_pre_login(PgSocket *client); +void admin_setup(void); +bool admin_error(PgSocket *console, const char *fmt, ...); +void admin_pause_done(void); +void admin_flush(PgSocket *admin, PktBuf *buf, const char *desc); +bool admin_ready(PgSocket *admin, const char *desc); diff --git a/src/bouncer.h b/src/bouncer.h new file mode 100644 index 0000000..b0af276 --- /dev/null +++ b/src/bouncer.h @@ -0,0 +1,294 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * core structures + */ + +#include "system.h" + +#include + +/* each state corresponts to a list */ +enum SocketState { + CL_FREE, /* free_client_list */ + CL_LOGIN, /* login_client_list */ + CL_WAITING, /* pool->waiting_client_list */ + CL_ACTIVE, /* pool->active_client_list */ + CL_CANCEL, /* pool->cancel_req_list */ + + SV_FREE, /* free_server_list */ + SV_LOGIN, /* pool->new_server_list */ + SV_IDLE, /* pool->idle_server_list */ + SV_ACTIVE, /* pool->active_server_list */ + SV_USED, /* pool->used_server_list */ + SV_TESTED /* pool->tested_server_list */ +}; + +#define is_server_socket(sk) ((sk)->state >= SV_FREE) + + +typedef struct PgSocket PgSocket; +typedef struct PgUser PgUser; +typedef struct PgDatabase PgDatabase; +typedef struct PgPool PgPool; +typedef struct PgStats PgStats; +typedef struct PgAddr PgAddr; +typedef enum SocketState SocketState; + +#include "util.h" +#include "list.h" +#include "mbuf.h" +#include "sbuf.h" +#include "pktbuf.h" + +#include "admin.h" +#include "loader.h" +#include "client.h" +#include "server.h" +#include "pooler.h" +#include "proto.h" +#include "objects.h" +#include "stats.h" +#include "takeover.h" +#include "janitor.h" + +/* to avoid allocations will use static buffers */ +#define MAX_DBNAME 64 +#define MAX_USERNAME 64 +#define MAX_PASSWORD 64 + +/* auth modes, should match PG's */ +#define AUTH_ANY -1 /* same as trust but without username check */ +#define AUTH_TRUST 0 +#define AUTH_PLAIN 3 +#define AUTH_CRYPT 4 +#define AUTH_MD5 5 +#define AUTH_CREDS 6 + +/* type codes for weird pkts */ +#define PKT_STARTUP 0x30000 +#define PKT_SSLREQ 80877103 +#define PKT_CANCEL 80877102 + +#define POOL_SESSION 0 +#define POOL_TX 1 +#define POOL_STMT 2 + +struct PgAddr { + struct in_addr ip_addr; + unsigned short port; + unsigned is_unix:1; +}; + +struct PgStats { + uint64 request_count; + uint64 server_bytes; + uint64 client_bytes; + usec_t query_time; /* total req time in us */ +}; + +/* contains connections for one db/user combo */ +struct PgPool { + List head; /* all pools */ + List map_head; /* pools for specific client/db */ + + /* pool contains connection into 'db' under 'user' */ + PgDatabase * db; + PgUser * user; + + /* waiting events logged in clients */ + StatList active_client_list; + /* client waits for a server to be available */ + StatList waiting_client_list; + /* closed client connections with server key */ + StatList cancel_req_list; + + /* servers linked with clients */ + StatList active_server_list; + /* servers ready to be linked with clients */ + StatList idle_server_list; + /* server just unlinked from clients */ + StatList used_server_list; + /* server in testing process */ + StatList tested_server_list; + /* servers in login phase */ + StatList new_server_list; + + /* stats */ + PgStats stats; + PgStats newer_stats; + PgStats older_stats; + + /* if last connect failed, there should be delay before next */ + usec_t last_connect_time; + unsigned last_connect_failed:1; + unsigned admin:1; +}; + +#define pool_server_count(pool) ( \ + statlist_count(&(pool)->active_server_list) + \ + statlist_count(&(pool)->idle_server_list) + \ + statlist_count(&(pool)->new_server_list) + \ + statlist_count(&(pool)->tested_server_list) + \ + statlist_count(&(pool)->used_server_list)) + +#define pool_client_count(pool) ( \ + statlist_count(&(pool)->active_client_list) + \ + statlist_count(&(pool)->waiting_client_list)) + +struct PgUser { + List head; + List pool_list; + char name[MAX_USERNAME]; + char passwd[MAX_PASSWORD]; +}; + +struct PgDatabase { + List head; + char name[MAX_DBNAME]; + + /* database info to be sent to client */ + uint8 welcome_msg[512]; + unsigned welcome_msg_len; + unsigned welcome_msg_ready:1; + + /* key/val pairs (without user) for startup msg to be sent to server */ + uint8 startup_params[256]; + unsigned startup_params_len; + + /* if not NULL, the user/psw is forced */ + PgUser * forced_user; + + /* address prepared for connect() */ + PgAddr addr; + + /* max server connections in one pool */ + int pool_size; + + /* info fields, pointer to inside startup_msg */ + const char * dbname; +}; + +struct PgSocket { + List head; /* list header */ + PgSocket * link; /* the dest of packets */ + PgPool * pool; /* parent pool, if NULL not yet assigned */ + + SocketState state; + + unsigned wait_for_welcome:1; /* no server yet in pool */ + unsigned ready:1; /* server accepts new query */ + unsigned flush_req:1; /* client requested flush */ + unsigned admin_user:1; + unsigned own_user:1; /* is console client with same uid */ + + /* if the socket is suspended */ + unsigned suspended:1; + + /* admin conn, waits for completion of PAUSE/SUSPEND cmd */ + unsigned wait_for_response:1; + /* this (server0 socket must be closed ASAP */ + unsigned close_needed:1; + + usec_t connect_time; /* when connection was made */ + usec_t request_time; /* last activity time */ + usec_t query_start; /* query start moment */ + + char salt[4]; + uint8 cancel_key[8]; + PgUser * auth_user; + PgAddr addr; + + SBuf sbuf; /* stream buffer, must be last */ +}; + +/* where to store old fd info during SHOW FDS result processing */ +#define tmp_sk_oldfd request_time +#define tmp_sk_linkfd query_start +/* takeover_clean_socket() needs to clean those up */ + +/* main.c */ +extern int cf_verbose; +extern int cf_daemon; + +extern char *cf_unix_socket_dir; +extern char *cf_listen_addr; +extern int cf_listen_port; + +extern int cf_pool_mode; +extern int cf_max_client_conn; +extern int cf_default_pool_size; + +extern usec_t cf_server_lifetime; +extern usec_t cf_server_idle_timeout; +extern char * cf_server_check_query; +extern usec_t cf_server_check_delay; +extern usec_t cf_server_connect_timeout; +extern usec_t cf_server_login_retry; +extern usec_t cf_query_timeout; +extern usec_t cf_client_idle_timeout; + +extern int cf_auth_type; +extern char *cf_auth_file; + +extern char *cf_logfile; +extern char *cf_pidfile; + +extern char *cf_admin_users; +extern char *cf_stats_users; +extern int cf_stats_period; + +extern int cf_pause_mode; +extern int cf_shutdown; +extern int cf_reboot; + +extern int cf_sbuf_len; +extern int cf_tcp_keepalive; +extern int cf_tcp_keepcnt; +extern int cf_tcp_keepidle; +extern int cf_tcp_keepintvl; +extern int cf_tcp_socket_buffer; +extern int cf_tcp_defer_accept; + +extern ConfElem bouncer_params[]; + + +static inline PgSocket * +pop_socket(StatList *slist) +{ + List *item = statlist_pop(slist); + if (item == NULL) + return NULL; + return container_of(item, PgSocket, head); +} + +static inline PgSocket * +first_socket(StatList *slist) +{ + if (statlist_empty(slist)) { + log_debug("first_socket: statlist_empty"); + return NULL; + } + log_debug("first_socket: next=%p", slist->head.next); + return container_of(slist->head.next, PgSocket, head); +} + +void load_config(bool reload); + + diff --git a/src/client.c b/src/client.c new file mode 100644 index 0000000..ee55c6c --- /dev/null +++ b/src/client.c @@ -0,0 +1,381 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Client connection handling + */ + +#include "bouncer.h" + +static bool check_client_passwd(PgSocket *client, const char *passwd) +{ + char md5[MD5_PASSWD_LEN + 1]; + const char *correct; + PgUser *user = client->auth_user; + + /* disallow empty passwords */ + if (!*passwd || !*user->passwd) + return false; + + switch (cf_auth_type) { + case AUTH_PLAIN: + return strcmp(user->passwd, passwd) == 0; + case AUTH_CRYPT: + correct = pg_crypt(user->passwd, (char *)client->salt); + return strcmp(correct, passwd) == 0; + case AUTH_MD5: + if (strlen(passwd) != MD5_PASSWD_LEN) + return false; + if (!isMD5(user->passwd)) + pg_md5_encrypt(user->passwd, user->name, strlen(user->name), user->passwd); + pg_md5_encrypt(user->passwd + 3, client->salt, 4, md5); + return strcmp(md5, passwd) == 0; + } + return false; +} + +bool +set_pool(PgSocket *client, const char *dbname, const char *username) +{ + PgDatabase *db; + PgUser *user; + + /* find database */ + db = find_database(dbname); + if (!db) { + disconnect_client(client, true, "No such database"); + return false; + } + + /* find user */ + if (cf_auth_type == AUTH_ANY) { + /* ignore requested user */ + user = NULL; + + if (db->forced_user == NULL) { + disconnect_client(client, true, "bouncer config error"); + log_error("auth_type=any requires forced user"); + return false; + } + client->auth_user = db->forced_user; + } else { + /* the user clients wants to log in as */ + user = find_user(username); + if (!user) { + disconnect_client(client, true, "No such user"); + return false; + } + client->auth_user = user; + } + + /* pool user may be forced */ + if (db->forced_user) + user = db->forced_user; + client->pool = get_pool(db, user); + if (!client->pool) { + disconnect_client(client, true, "no mem for pool"); + return false; + } + + return true; +} + +static bool decide_startup_pool(PgSocket *client, MBuf *pkt) +{ + const char *username = NULL, *dbname = NULL; + const char *key, *val; + + while (1) { + key = mbuf_get_string(pkt); + if (!key || *key == 0) + break; + val = mbuf_get_string(pkt); + if (!val) + break; + + if (strcmp(key, "database") == 0) + dbname = val; + else if (strcmp(key, "user") == 0) + username = val; + } + if (!username) { + disconnect_client(client, true, "No username supplied"); + return false; + } + if (!dbname) { + disconnect_client(client, true, "No database supplied"); + return false; + } + slog_debug(client, "login request: db=%s user=%s", dbname, username); + + /* check if limit allows, dont limit admin db + nb: new incoming conn will be attached to PgSocket, thus + get_active_client_count() counts it */ + if (get_active_client_count() > cf_max_client_conn) { + if (strcmp(dbname, "pgbouncer") != 0) { + disconnect_client(client, true, "no more conns allowed"); + return false; + } + } + return set_pool(client, dbname, username); +} + +static const char valid_crypt_salt[] = +"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +static bool send_client_authreq(PgSocket *client) +{ + uint8 saltlen = 0; + int res; + int auth = cf_auth_type; + + if (auth == AUTH_CRYPT) { + saltlen = 2; + get_random_bytes((void*)client->salt, saltlen); + client->salt[0] = valid_crypt_salt[client->salt[0] & 0x3f]; + client->salt[1] = valid_crypt_salt[client->salt[1] & 0x3f]; + client->salt[2] = 0; + } else if (cf_auth_type == AUTH_MD5) { + saltlen = 4; + get_random_bytes((void*)client->salt, saltlen); + } else if (auth == AUTH_ANY) + auth = AUTH_TRUST; + + SEND_generic(res, client, 'R', "ib", auth, client->salt, saltlen); + return res; +} + +/* decide on packets of client in login phase */ +static bool handle_client_startup(PgSocket *client, MBuf *pkt) +{ + unsigned pkt_type; + unsigned pkt_len; + const char *passwd; + + SBuf *sbuf = &client->sbuf; + + /* dont tolerate partial packets */ + if (!get_header(pkt, &pkt_type, &pkt_len)) { + disconnect_client(client, true, "client sent bad pkt header"); + return false; + } + + if (client->wait_for_welcome) { + if (finish_client_login(client)) { + /* the packet was already parsed */ + sbuf_prepare_skip(sbuf, pkt_len); + return true; + } else + return false; + } + + slog_noise(client, "pkt='%c' len=%d", + pkt_type < 256 ? pkt_type : '?', pkt_len); + + switch (pkt_type) { + case PKT_SSLREQ: + log_noise("C: req SSL"); + log_noise("P: nak"); + sbuf_answer(&client->sbuf, "N", 1); + break; + case PKT_STARTUP: + if (mbuf_avail(pkt) < pkt_len - 8) { + disconnect_client(client, true, "client sent partial pkt in startup"); + return false; + } + if (client->pool) { + disconnect_client(client, true, "client re-sent startup pkt"); + return false; + } + + if (!decide_startup_pool(client, pkt)) + return false; + + if (client->pool->admin) { + if (!admin_pre_login(client)) + return false; + } + + if (cf_auth_type <= AUTH_TRUST || client->own_user) { + if (!finish_client_login(client)) + return false; + } else { + send_client_authreq(client); + } + break; + case 'p': /* PasswordMessage */ + if (mbuf_avail(pkt) < pkt_len - 5) { + disconnect_client(client, true, "client sent partial pkt in startup"); + return false; + } + + /* havent requested it */ + if (cf_auth_type <= AUTH_TRUST) { + disconnect_client(client, true, "unreqested passwd pkt"); + return false; + } + + passwd = mbuf_get_string(pkt); + if (passwd && check_client_passwd(client, passwd)) { + if (!finish_client_login(client)) + return false; + } else { + disconnect_client(client, true, "Login failed"); + return false; + } + break; + case PKT_CANCEL: + if (mbuf_avail(pkt) == 8) { + const uint8 *key = mbuf_get_bytes(pkt, 8); + memcpy(client->cancel_key, key, 8); + accept_cancel_request(client); + } else + disconnect_client(client, false, "bad cancel request"); + return false; + default: + disconnect_client(client, false, "bad pkt"); + return false; + } + sbuf_prepare_skip(sbuf, pkt_len); + client->request_time = get_cached_time(); + return true; +} + +/* decide on packets of logged-in client */ +static bool handle_client_work(PgSocket *client, MBuf *pkt) +{ + unsigned pkt_type; + unsigned pkt_len; + bool flush = 0; + SBuf *sbuf = &client->sbuf; + + if (!get_header(pkt, &pkt_type, &pkt_len)) { + disconnect_client(client, true, "bad pkt header"); + return false; + } + slog_noise(client, "pkt='%c' len=%d", pkt_type, pkt_len); + + switch (pkt_type) { + + /* request immidiate response from server */ + case 'H': /* Flush */ + client->flush_req = 1; + case 'S': /* Sync */ + /* sync is followed by ReadyForQuery */ + + /* one-packet queries */ + case 'Q': /* Query */ + case 'F': /* FunctionCall */ + + /* copy end markers */ + case 'c': /* CopyDone(F/B) */ + case 'f': /* CopyFail(F/B) */ + + /* above packets should be sent ASAP */ + flush = 1; + + /* + * extended protocol allows server (and thus pooler) + * to buffer packets until sync or flush is sent by client + */ + case 'P': /* Parse */ + case 'E': /* Execute */ + case 'C': /* Close */ + case 'B': /* Bind */ + case 'D': /* Describe */ + case 'd': /* CopyData(F/B) */ + + /* update stats */ + if (!client->query_start) { + client->pool->stats.request_count++; + client->query_start = get_time_usec(); + } + + if (client->pool->admin) + return admin_handle_client(client, pkt, pkt_type, pkt_len); + + /* aquire server */ + if (!find_server(client)) + return false; + + client->pool->stats.client_bytes += pkt_len; + + /* tag the server as dirty */ + client->link->ready = 0; + + /* forward the packet */ + sbuf_prepare_send(sbuf, &client->link->sbuf, pkt_len, flush); + break; + + /* client wants to go away */ + default: + slog_error(client, "unknown pkt from client: %d/0x%x", pkt_type, pkt_type); + disconnect_client(client, true, "unknown pkt"); + return false; + case 'X': /* Terminate */ + disconnect_client(client, false, "client close request"); + return false; + } + return true; +} + +/* callback from SBuf */ +bool client_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg) +{ + bool res = false; + PgSocket *client = arg; + + Assert(!is_server_socket(client)); + Assert(client->state != SV_FREE); + + switch (evtype) { + case SBUF_EV_CONNECT_OK: + case SBUF_EV_CONNECT_FAILED: + /* ^ those should not happen */ + case SBUF_EV_RECV_FAILED: + disconnect_client(client, false, "client unexpected eof"); + break; + case SBUF_EV_SEND_FAILED: + disconnect_server(client->link, false, "Server connection closed"); + break; + case SBUF_EV_READ: + if (mbuf_avail(pkt) < 5) { + log_noise("C: got partial header, trying to wait a bit"); + return false; + } + + client->request_time = get_cached_time(); + switch (client->state) { + case CL_LOGIN: + res = handle_client_startup(client, pkt); + break; + case CL_ACTIVE: + if (client->wait_for_welcome) + res = handle_client_startup(client, pkt); + else + res = handle_client_work(client, pkt); + break; + case CL_WAITING: + fatal("why waiting client in client_proto()"); + default: + fatal("bad client state: %d", client->state); + } + } + return res; +} + diff --git a/src/client.h b/src/client.h new file mode 100644 index 0000000..136a1a0 --- /dev/null +++ b/src/client.h @@ -0,0 +1,22 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +bool client_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg); +bool set_pool(PgSocket *client, const char *dbname, const char *username); + + diff --git a/src/janitor.c b/src/janitor.c new file mode 100644 index 0000000..9d2d554 --- /dev/null +++ b/src/janitor.c @@ -0,0 +1,445 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Periodic maintenance. + */ + +#include "bouncer.h" + +static struct timeval full_maint_period = {0, USEC / 3}; +static struct event full_maint_ev; + +/* close all sockets in server list */ +static void close_server_list(StatList *sk_list, const char *reason) +{ + List *item, *tmp; + PgSocket *server; + + statlist_for_each_safe(item, sk_list, tmp) { + server = container_of(item, PgSocket, head); + disconnect_server(server, true, reason); + } +} + +/* suspend all sockets in socket list */ +static int suspend_socket_list(StatList *list) +{ + List *item; + PgSocket *sk; + int active = 0; + + statlist_for_each(item, list) { + sk = container_of(item, PgSocket, head); + if (!sk->suspended) { + if (sbuf_empty(&sk->sbuf)) { + sbuf_pause(&sk->sbuf); + sk->suspended = 1; + } else + active++; + } + } + return active; +} + +/* resume all suspended sockets in socket list */ +static void resume_socket_list(StatList *list) +{ + List *item, *tmp; + PgSocket *sk; + + statlist_for_each_safe(item, list, tmp) { + sk = container_of(item, PgSocket, head); + if (sk->suspended) { + sk->suspended = 0; + sbuf_continue(&sk->sbuf); + } + } +} + +/* resume all suspended sockets in all pools */ +static void resume_sockets(void) +{ + List *item; + PgPool *pool; + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + if (pool->admin) + continue; + resume_socket_list(&pool->active_client_list); + resume_socket_list(&pool->active_server_list); + resume_socket_list(&pool->idle_server_list); + resume_socket_list(&pool->used_server_list); + } +} + +/* resume pools and listen sockets */ +void resume_all(void) +{ + resume_sockets(); + resume_pooler(); +} + +/* + * send test/reset query to server if needed + */ +static void launch_recheck(PgPool *pool) +{ + const char *q = cf_server_check_query; + bool need_check = true; + PgSocket *server; + bool res = true; + + server = first_socket(&pool->used_server_list); + + /* is the check needed? */ + if (q == NULL || q[0] == 0) + need_check = false; + else if (cf_server_check_delay > 0) { + usec_t now = get_cached_time(); + if (now - server->request_time < cf_server_check_delay) + need_check = false; + } + + if (need_check) { + /* send test query, wait for result */ + change_server_state(server, SV_TESTED); + SEND_generic(res, server, 'Q', "s", q); + if (!res) + disconnect_server(server, false, "test query failed"); + } else + /* make immidiately available */ + change_server_state(server, SV_IDLE); +} + +/* + * make servers available + */ +static void per_loop_activate(PgPool *pool) +{ + List *item, *tmp; + PgSocket *client; + + /* see if any server have been freed */ + statlist_for_each_safe(item, &pool->waiting_client_list, tmp) { + client = container_of(item, PgSocket, head); + if (!statlist_empty(&pool->idle_server_list)) { + + /* db not fully initialized after reboot */ + if (client->wait_for_welcome && !pool->db->welcome_msg_ready) { + launch_new_connection(pool); + continue; + } + + /* there is a ready server already */ + activate_client(client); + } else if (!statlist_empty(&pool->tested_server_list)) { + /* some connections are in testing process */ + + /* not enough connections? (X) */ + launch_new_connection(pool); + break; + } else if (!statlist_empty(&pool->used_server_list)) { + /* ask for more connections to be tested */ + launch_recheck(pool); + + /* not enough connections? (X) */ + launch_new_connection(pool); + break; + } else { + /* not enough connections */ + launch_new_connection(pool); + break; + } + } +} +/* + * (X) - theres some problem in light load with small server_check_timeout + * where waiting connection wont ever get server connection. + */ + +/* + * pause active clients + */ +static int per_loop_pause(PgPool *pool) +{ + int active = 0; + + if (pool->admin) + return 0; + + close_server_list(&pool->idle_server_list, "pause mode"); + close_server_list(&pool->used_server_list, "pause mode"); + close_server_list(&pool->new_server_list, "pause mode"); + + active += statlist_count(&pool->active_server_list); + active += statlist_count(&pool->tested_server_list); + + return active; +} + +/* + * suspend active clients and servers + */ +static int per_loop_suspend(PgPool *pool) +{ + int active = 0; + + if (pool->admin) + return 0; + + active += suspend_socket_list(&pool->active_client_list); + + if (!statlist_empty(&pool->waiting_client_list)) { + active += statlist_count(&pool->waiting_client_list); + per_loop_activate(pool); + } + + if (!active) { + active += suspend_socket_list(&pool->active_server_list); + active += suspend_socket_list(&pool->idle_server_list); + active += statlist_count(&pool->tested_server_list); + + /* as all clients are done, no need for them */ + close_server_list(&pool->used_server_list, "close unsafe fds on suspend"); + } + + return active; +} + +/* + * this function is called for each event loop. + */ +void per_loop_object_maint(void) +{ + List *item; + PgPool *pool; + int active = 0; + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + if (pool->admin) + continue; + switch (cf_pause_mode) { + case 0: + per_loop_activate(pool); + break; + case 1: + active += per_loop_pause(pool); + break; + case 2: + active += per_loop_suspend(pool); + break; + } + } + + switch (cf_pause_mode) { + case 2: + active += statlist_count(&login_client_list); + case 1: + if (!active) + admin_pause_done(); + default: + break; + } +} + +/* maintaing clients in pool */ +static void pool_client_maint(PgPool *pool) +{ + List *item, *tmp; + usec_t now = get_cached_time(); + PgSocket *client; + usec_t age; + + /* force client_idle_timeout */ + if (cf_client_idle_timeout > 0) { + statlist_for_each_safe(item, &pool->active_client_list, tmp) { + client = container_of(item, PgSocket, head); + Assert(client->state == CL_ACTIVE); + if (client->link) + continue; + if (now - client->request_time > cf_client_idle_timeout) + disconnect_client(client, true, "idle_timeout"); + } + } + + /* force client_query_timeout */ + if (cf_query_timeout > 0) { + statlist_for_each_safe(item, &pool->waiting_client_list, tmp) { + client = container_of(item, PgSocket, head); + Assert(client->state == CL_WAITING); + if (client->query_start == 0) { + age = now - client->request_time; + log_warning("query_start==0"); + } else + age = now - client->query_start; + if (age > cf_query_timeout) + disconnect_client(client, true, "query_timeout"); + } + } +} + +static void check_unused_servers(StatList *slist, usec_t now, bool idle_test) +{ + List *item, *tmp; + usec_t idle, age; + PgSocket *server; + + /* disconnect idle servers if needed */ + statlist_for_each_safe(item, slist, tmp) { + server = container_of(item, PgSocket, head); + + age = now - server->connect_time; + idle = now - server->request_time; + + if (server->close_needed) + disconnect_server(server, true, "db conf changed"); + else if (cf_server_idle_timeout > 0 && idle > cf_server_idle_timeout) + disconnect_server(server, true, "server idle timeout"); + else if (cf_server_lifetime > 0 && age > cf_server_lifetime) + disconnect_server(server, true, "server lifetime over"); + else if (cf_pause_mode == 1) + disconnect_server(server, true, "pause mode"); + else if (idle_test && *cf_server_check_query) { + if (idle > cf_server_check_delay) + change_server_state(server, SV_USED); + } + } +} + +/* + * Check pool size, close conns if too many. Makes pooler + * react faster to the case when admin decreased pool size. + */ +static void check_pool_size(PgPool *pool) +{ + PgSocket *server; + int cur = statlist_count(&pool->active_server_list) + + statlist_count(&pool->idle_server_list) + + statlist_count(&pool->used_server_list) + + statlist_count(&pool->tested_server_list); + + /* cancel pkt may create new srv conn without + * taking pool_size into account + * + * statlist_count(&pool->new_server_list) + */ + + int many = cur - pool->db->pool_size; + + Assert(pool->db->pool_size >= 0); + + while (many > 0) { + server = first_socket(&pool->used_server_list); + if (!server) + server = first_socket(&pool->idle_server_list); + if (!server) + break; + disconnect_server(server, true, "too many servers in pool"); + many--; + } +} + +/* maintain servers in a pool */ +static void pool_server_maint(PgPool *pool) +{ + List *item, *tmp; + usec_t age, now = get_cached_time(); + PgSocket *server; + + /* find and disconnect idle servers */ + check_unused_servers(&pool->used_server_list, now, 0); + check_unused_servers(&pool->tested_server_list, now, 0); + check_unused_servers(&pool->idle_server_list, now, 1); + + /* where query got did not get answer in query_timeout */ + if (cf_query_timeout > 0) { + statlist_for_each_safe(item, &pool->active_server_list, tmp) { + server = container_of(item, PgSocket, head); + Assert(server->state == SV_ACTIVE); + if (server->ready) + continue; + age = now - server->link->request_time; + if (age > cf_query_timeout) + disconnect_server(server, true, "statement timeout"); + } + } + + /* find connections that got connect, but could not log in */ + if (cf_server_connect_timeout > 0) { + statlist_for_each_safe(item, &pool->new_server_list, tmp) { + server = container_of(item, PgSocket, head); + Assert(server->state == SV_LOGIN); + + age = now - server->connect_time; + if (age > cf_server_connect_timeout) + disconnect_server(server, true, "connect timeout"); + } + } + + check_pool_size(pool); +} + +/* full-scale maintenenace, done only occasionally */ +static void do_full_maint(int sock, short flags, void *arg) +{ + List *item; + PgPool *pool; + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + if (pool->admin) + continue; + pool_server_maint(pool); + pool_client_maint(pool); + } + + if (cf_shutdown && get_active_server_count() == 0) { + log_info("server connections dropped, exiting"); + exit(0); + } + + loader_users_check(); + + evtimer_add(&full_maint_ev, &full_maint_period); +} + +/* first-time initializtion */ +void janitor_setup(void) +{ + /* launch maintenance */ + evtimer_set(&full_maint_ev, do_full_maint, NULL); + evtimer_add(&full_maint_ev, &full_maint_period); +} + +/* as [pgbouncer] section can be loaded after databases, + theres need for review */ +void config_postprocess(void) +{ + List *item; + PgDatabase *db; + + statlist_for_each(item, &database_list) { + db = container_of(item, PgDatabase, head); + if (db->pool_size < 0) + db->pool_size = cf_default_pool_size; + } +} + diff --git a/src/janitor.h b/src/janitor.h new file mode 100644 index 0000000..2c8ccd1 --- /dev/null +++ b/src/janitor.h @@ -0,0 +1,23 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +void janitor_setup(void); +void config_postprocess(void); +void resume_all(void); +void per_loop_object_maint(void); + diff --git a/src/list.h b/src/list.h new file mode 100644 index 0000000..8bc2854 --- /dev/null +++ b/src/list.h @@ -0,0 +1,244 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Circular doubly linked list implementation. + * + * Basic idea from . + * + * seemed usable, but overcomplicated. + */ + +#ifndef __LIST_H_ +#define __LIST_H_ + +/* turn on slow checking */ +#if defined(CASSERT) && !defined(LIST_DEBUG) +#define LIST_DEBUG +#endif + +/* give offset of a field inside struct */ +#ifndef offsetof +#define offsetof(type, field) ((unsigned)&(((type *)0)->field)) +#endif + +/* given pointer to field inside struct, return pointer to struct */ +#ifndef container_of +#define container_of(ptr, type, field) ((type *)((char *)(ptr) - offsetof(type, field))) +#endif + +/* list type */ +typedef struct List List; +struct List { + List *next; + List *prev; +}; + +#define LIST(var) List var = { &var, &var } + +/* initialize struct */ +static inline void list_init(List *list) +{ + list->next = list->prev = list; +} + +/* is list empty? */ +static inline bool list_empty(List *list) +{ + return list->next == list; +} + +/* add item to the start of the list */ +static inline List *list_prepend(List *item, List *list) +{ + Assert(list_empty(item)); + + item->next = list->next; + item->prev = list; + list->next->prev = item; + list->next = item; + return item; +} + +/* add item to the end of the list */ +static inline List *list_append(List *item, List *list) +{ + Assert(list_empty(item)); + + item->next = list; + item->prev = list->prev; + list->prev->next = item; + list->prev = item; + return item; +} + +/* remove item from list */ +static inline List *list_del(List *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; + item->next = item->prev = item; + return item; +} + +/* remove first from list and return */ +static inline List *list_pop(List *list) +{ + if (list_empty(list)) + return NULL; + return list_del(list->next); +} + +/* remove first from list and return */ +static inline List *list_first(List *list) +{ + if (list_empty(list)) + return NULL; + return list->next; +} + +/* remove first elem from list and return with casting */ +#define list_pop_type(list, typ, field) \ + (list_empty(list) ? NULL \ + : container_of(list_del((list)->next), typ, field)) + +/* loop over list */ +#define list_for_each(item, list) \ + for ((item) = (list)->next; \ + (item) != (list); \ + (item) = (item)->next) + +/* loop over list and allow removing item */ +#define list_for_each_safe(item, list, tmp) \ + for ((item) = (list)->next, (tmp) = (list)->next->next; \ + (item) != (list); \ + (item) = (tmp), (tmp) = (tmp)->next) + +static inline bool item_in_list(List *item, List *list) +{ + List *tmp; + list_for_each(tmp, list) + if (tmp == item) + return 1; + return 0; +} + + +/* + * wrapper for List that keeps track of number of items + */ + +typedef struct StatList StatList; +struct StatList { + List head; + int cur_count; + int max_count; + const char *name; +}; + +#define STATLIST(var) StatList var = { {&var.head, &var.head}, 0, 0, #var } + +static inline void statlist_reset(StatList *list) +{ + list->max_count = list->cur_count; +} + +static inline void statlist_prepend(List *item, StatList *list) +{ + list_prepend(item, &list->head); + list->cur_count ++; + if (list->cur_count > list->max_count) + list->max_count = list->cur_count; +} + +static inline void statlist_append(List *item, StatList *list) +{ + list_append(item, &list->head); + list->cur_count ++; + if (list->cur_count > list->max_count) + list->max_count = list->cur_count; +} + +static inline void statlist_put_before(List *item, StatList *list, List *pos) +{ + list_append(item, pos); + list->cur_count++; + if (list->cur_count > list->max_count) + list->max_count = list->cur_count; +} + +static inline void statlist_remove(List *item, StatList *list) +{ +#ifdef LIST_DEBUG + /* sanity check */ + if (!item_in_list(item, &list->head)) + fatal("item in wrong list, expected: %s", list->name); +#endif + + list_del(item); + list->cur_count--; + + Assert(list->cur_count >= 0); +} + +static inline void statlist_init(StatList *list, const char *name) +{ + list_init(&list->head); + list->name = name; + list->cur_count = list->max_count = 0; +} + +static inline int statlist_count(StatList *list) +{ + Assert(list->cur_count > 0 || list_empty(&list->head)); + return list->cur_count; +} + +static inline int statlist_max(StatList *list) +{ + return list->max_count > list->cur_count + ? list->max_count : list->cur_count; +} + +static inline List *statlist_pop(StatList *list) +{ + List *item = list_pop(&list->head); + + if (item) + list->cur_count--; + + Assert(list->cur_count >= 0); + + return item; +} + +static inline List *statlist_first(StatList *list) +{ + return list_first(&list->head); +} + +static inline bool statlist_empty(StatList *list) +{ + return list_empty(&list->head); +} + +#define statlist_for_each(item, list) list_for_each(item, &((list)->head)) +#define statlist_for_each_safe(item, list, tmp) list_for_each_safe(item, &((list)->head), tmp) + +#endif /* __LIST_H_ */ + diff --git a/src/loader.c b/src/loader.c new file mode 100644 index 0000000..1e31b2c --- /dev/null +++ b/src/loader.c @@ -0,0 +1,529 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Config and pg_auth file reading. + */ + +#include "bouncer.h" + +/* + * ConnString parsing + */ + +/* get key=val pair from connstring */ +static char * getpair(char *p, + char **key_p, int *key_len, + char **val_p, int *val_len) +{ + while (*p && *p == ' ') + p++; + *key_p = p; + while (*p && *p != '=' && *p != ' ') + p++; + *key_len = p - *key_p; + if (*p == '=') + p++; + *val_p = p; + while (*p && *p != ' ') + p++; + *val_len = p - *val_p; + + while (*p && *p == ' ') + p++; + return p; +} + +/* fill PgDatabase from connstr */ +void parse_database(char *name, char *connstr) +{ + char *p, *key, *val; + int klen, vlen; + PktBuf buf; + PgDatabase *db; + int pool_size = -1; + + char *dbname = name; + char *host = NULL; + char *port = "5432"; + char *username = NULL; + char *password = ""; + char *client_encoding = NULL; + char *datestyle = NULL; + + in_addr_t v_addr = INADDR_NONE; + int v_port; + + p = connstr; + while (*p) { + p = getpair(p, &key, &klen, &val, &vlen); + if (*key == 0 || *val == 0 || klen == 0 || vlen == 0) + break; + key[klen] = 0; + val[vlen] = 0; + + if (strcmp("dbname", key) == 0) + dbname = val; + else if (strcmp("host", key) == 0) + host = val; + else if (strcmp("port", key) == 0) + port = val; + else if (strcmp("user", key) == 0) + username = val; + else if (strcmp("password", key) == 0) + password = val; + else if (strcmp("client_encoding", key) == 0) + client_encoding = val; + else if (strcmp("datestyle", key) == 0) + datestyle = val; + else if (strcmp("pool_size", key) == 0) + pool_size = atoi(val); + else { + log_error("skipping database %s because" + " of bad connstring: %s", name, connstr); + return; + } + } + + if (!host) { + if (!cf_unix_socket_dir) { + log_error("skipping database %s because" + " unix socket not configured", name); + return; + } + } else { + v_addr = inet_addr(host); + if (v_addr == INADDR_NONE) { + log_error("skipping database %s because" + " of bad host: %s", name, host); + return; + } + } + v_port = atoi(port); + if (v_port == 0) { + log_error("skipping database %s because" + " of bad port: %s", name, port); + return; + } + + db = add_database(name); + if (!db) { + log_error("cannot create database, no mem?"); + return; + } + + if (db->dbname) { + bool changed = false; + if (strcmp(db->dbname, dbname) != 0) + changed = true; + else if (host && db->addr.is_unix) + changed = true; + else if (!host && !db->addr.is_unix) + changed = true; + else if (host && v_addr != db->addr.ip_addr.s_addr) + changed = true; + else if (v_port != db->addr.port) + changed = true; + else if (username && !db->forced_user) + changed = true; + else if (username && strcmp(username, db->forced_user->name)) + changed = true; + else if (!username && db->forced_user) + changed = true; + + if (changed) + tag_database_dirty(db); + } + + /* if pool_size < 0 it will be set later */ + db->pool_size = pool_size; + db->addr.port = v_port; + db->addr.ip_addr.s_addr = v_addr; + db->addr.is_unix = host ? 0 : 1; + + pktbuf_static(&buf, db->startup_params, sizeof(db->startup_params)); + + pktbuf_put_string(&buf, "database"); + db->dbname = (char *)db->startup_params + pktbuf_written(&buf); + pktbuf_put_string(&buf, dbname); + + if (client_encoding) { + pktbuf_put_string(&buf, "client_encoding"); + pktbuf_put_string(&buf, client_encoding); + } + + if (datestyle) { + pktbuf_put_string(&buf, "datestyle"); + pktbuf_put_string(&buf, datestyle); + } + + db->startup_params_len = pktbuf_written(&buf); + + /* if user is forces, create fake object for it */ + if (username != NULL) { + if (!force_user(db, username, password)) + log_warning("db setup failed, trying to continue"); + } else if (db->forced_user) + log_warning("losing forced user not supported," + " keeping old setting"); +} + +/* + * User file parsing + */ + +/* find next " in string, skipping escaped ones */ +static char *find_quote(char *p) +{ +loop: + while (*p && *p != '\\' && *p != '"') p++; + if (*p == '\\' && p[1]) { + p += 2; + goto loop; + } + + return p; +} + +/* string is unquoted while copying */ +static void copy_quoted(char *dst, const char *src, int len) +{ + char *end = dst + len - 1; + while (*src && dst < end) { + if (*src != '\\') + *dst++ = *src++; + else + src++; + } + *dst = 0; +} + +static void unquote_add_user(const char *username, const char *password) +{ + char real_user[MAX_USERNAME]; + char real_passwd[MAX_PASSWORD]; + PgUser *user; + + copy_quoted(real_user, username, sizeof(real_user)); + copy_quoted(real_passwd, password, sizeof(real_passwd)); + + user = add_user(real_user, real_passwd); + if (!user) + log_warning("cannot create user, no mem"); +} + +static bool auth_loaded(const char *fn) +{ + static struct stat cache; + struct stat cur; + + /* hack for resetting */ + if (fn == NULL) { + memset(&cache, 0, sizeof(cache)); + return false; + } + + if (stat(fn, &cur) < 0) + return false; + + if (cache.st_dev == cur.st_dev + && cache.st_ino == cur.st_ino + && cache.st_mode == cur.st_mode + && cache.st_uid == cur.st_gid + && cache.st_mtime == cur.st_mtime + && cache.st_size == cur.st_size) + return true; + cache = cur; + return false; +} + +bool loader_users_check(void) +{ + if (auth_loaded(cf_auth_file)) + return true; + + return load_auth_file(cf_auth_file); +} + +/* load list of users from pg_auth/pg_psw file */ +bool load_auth_file(const char *fn) +{ + char *user, *password, *buf, *p; + + buf = load_file(fn); + if (buf == NULL) { + /* reset file info */ + auth_loaded(NULL); + return false; + } + + p = buf; + while (*p) { + /* skip whitespace and empty lines */ + while (*p && isspace(*p)) p++; + if (!*p) + break; + + /* start of line */ + if (*p != '"') { + log_error("broken auth file"); + break; + } + user = ++p; + p = find_quote(p); + if (*p != '"') { + log_error("broken auth file"); + break; + } + if (p - user >= MAX_USERNAME) { + log_error("too long username"); + break; + } + *p++ = 0; /* tag username end */ + + /* get password */ + p = find_quote(p); + if (*p != '"') { + log_error("broken auth file"); + break; + } + password = ++p; + p = find_quote(p); + if (*p != '"') { + log_error("broken auth file"); + break; + } + if (p - password >= MAX_PASSWORD) { + log_error("too long password"); + break; + } + *p++ = 0; /* tag password end */ + + /* send them away */ + unquote_add_user(user, password); + + /* skip rest of the line */ + while (*p && *p != '\n') p++; + } + free(buf); + + create_auth_cache(); + + return true; +} + +/* + * INI file parser + */ + +bool cf_set_int(ConfElem *elem, const char *val, PgSocket *console) +{ + int *int_p = elem->dst; + if (*val < '0' || *val > '9') { + admin_error(console, "bad value: %s", val); + return false; + } + *int_p = atoi(val); + return true; +} + +const char *cf_get_int(ConfElem *elem) +{ + static char numbuf[32]; + int val; + + val = *(int *)elem->dst; + sprintf(numbuf, "%d", val); + return numbuf; +} +bool cf_set_time(ConfElem *elem, const char *val, PgSocket *console) +{ + usec_t *time_p = elem->dst; + if (*val < '0' || *val > '9') { + admin_error(console, "bad value: %s", val); + return false; + } + *time_p = USEC * (usec_t)atoi(val); + return true; +} + +const char *cf_get_time(ConfElem *elem) +{ + static char numbuf[32]; + usec_t val; + + val = *(usec_t *)elem->dst; + sprintf(numbuf, "%d", (int)(val / USEC)); + return numbuf; +} + +bool cf_set_str(ConfElem *elem, const char *val, PgSocket *console) +{ + char **str_p = elem->dst; + char *tmp; + + /* dont touch if not changed */ + if (*str_p && strcmp(*str_p, val) == 0) + return true; + + /* if dynamically allocated, free it */ + if (elem->allocated) + free(*str_p); + + tmp = strdup(val); + if (!tmp) + return false; + + *str_p = tmp; + elem->allocated = true; + return true; +} + +const char * cf_get_str(ConfElem *elem) +{ + return *(char **)elem->dst; +} + +bool set_config_param(ConfElem *elem_list, + const char *key, const char *val, + bool reload, PgSocket *console) +{ + ConfElem *desc; + + for (desc = elem_list; desc->name; desc++) { + if (strcasecmp(key, desc->name)) + continue; + + /* if reload not allowed, skip it */ + if (reload && !desc->reloadable) { + if (console) + admin_error(console, + "%s cannot be changed online", key); + return false; + } + + /* got config, parse it */ + return desc->io.fn_set(desc, val, console); + } + admin_error(console, "unknown config parameter: %s", key); + return false; +} + +static void map_config(ConfSection *sect, char *key, char *val, bool reload) +{ + if (sect == NULL) + return; + + if (sect->data_fn) + sect->data_fn(key, val); + else + set_config_param(sect->elem_list, key, val, reload, NULL); +} + +const char *conf_to_text(ConfElem *elem) +{ + return elem->io.fn_get(elem); +} + +static ConfSection *find_section(ConfSection *sect, const char *name) +{ + for (; sect->name; sect++) + if (strcasecmp(sect->name, name) == 0) + return sect; + log_warning("unknown section in config: %s", name); + return NULL; +} + +void iniparser(const char *fn, ConfSection *sect_list, bool reload) +{ + char *buf; + char *p, *key, *val; + int klen, vlen; + ConfSection *cur_section = NULL; + + buf = load_file(fn); + if (buf == NULL) { + if (!reload) + exit(1); + else + return; + } + + p = buf; + while (*p) { + /* space at the start of line - including empty lines */ + while (*p && isspace(*p)) p++; + + /* skip comment lines */ + if (*p == '#' || *p == ';') { + while (*p && *p != '\n') p++; + continue; + } + /* got new section */ + if (*p == '[') { + key = ++p; + while (*p && *p != ']' && *p != '\n') p++; + if (*p != ']') { + log_warning("bad section header"); + cur_section = NULL; + continue; + } + *p++ = 0; + + cur_section = find_section(sect_list, key); + continue; + } + + /* done? */ + if (*p == 0) break; + + /* read key val */ + key = p; + while (*p && (isalnum(*p) || *p == '_')) p++; + klen = p - key; + + /* expect '=', skip it */ + while (*p && (*p == ' ' || *p == '\t')) p++; + if (*p != '=') { + log_error("syntax error in config, stopping loading"); + break; + } else + p++; + while (*p && (*p == ' ' || *p == '\t')) p++; + + /* now read value */ + val = p; + while (*p && (*p != '\n')) + p++; + vlen = p - val; + /* eat space at end */ + while (vlen > 0 && isspace(val[vlen - 1])) + vlen--; + + /* skip junk */ + while (*p && isspace(*p)) p++; + + /* our buf is r/w, so take it easy */ + key[klen] = 0; + val[vlen] = 0; + map_config(cur_section, key, val, reload); + } + + free(buf); +} + diff --git a/src/loader.h b/src/loader.h new file mode 100644 index 0000000..957d883 --- /dev/null +++ b/src/loader.h @@ -0,0 +1,74 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* configuration parsing */ +#define CF_INT {cf_get_int, cf_set_int} +#define CF_STR {cf_get_str, cf_set_str} +#define CF_TIME {cf_get_time, cf_set_time} + +#define CF_SECT_VARS 1 /* senction contains pre-defined key-var pairs */ +#define CF_SECT_DATA 2 /* key-val pairs are data */ + +typedef struct ConfElem ConfElem; + +/* callback for CF_SECT_DATA loading */ +typedef void (*conf_data_callback_fn)(char *key, char *value); + +typedef const char * (*conf_var_get_fn)(ConfElem *elem); +typedef bool (*conf_var_set_fn)(ConfElem *elem, const char *value, PgSocket *console); + +typedef struct { + conf_var_get_fn fn_get; + conf_var_set_fn fn_set; +} ConfAccess; + +struct ConfElem { + const char *name; + bool reloadable; + ConfAccess io; + void *dst; + bool allocated; +}; + +typedef struct ConfSection { + const char *name; + ConfElem *elem_list; + conf_data_callback_fn data_fn; +} ConfSection; + +void iniparser(const char *fn, ConfSection *sect_list, bool reload); + +const char * cf_get_int(ConfElem *elem); +bool cf_set_int(ConfElem *elem, const char *value, PgSocket *console); + +const char * cf_get_time(ConfElem *elem); +bool cf_set_time(ConfElem *elem, const char *value, PgSocket *console); + +const char *cf_get_str(ConfElem *elem); +bool cf_set_str(ConfElem *elem, const char *value, PgSocket *console); + +const char *conf_to_text(ConfElem *elem); +bool set_config_param(ConfElem *elem_list, const char *key, const char *val, bool reload, PgSocket *console); + +/* connstring parsing */ +void parse_database(char *name, char *connstr); + +/* user file parsing */ +bool load_auth_file(const char *fn); +bool loader_users_check(void); + diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..7841c53 --- /dev/null +++ b/src/main.c @@ -0,0 +1,459 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Launcer for all the rest. + */ + +#include "bouncer.h" + +#include +#include + +static bool set_mode(ConfElem *elem, const char *val, PgSocket *console); +static const char *get_mode(ConfElem *elem); +static bool set_auth(ConfElem *elem, const char *val, PgSocket *console); +static const char *get_auth(ConfElem *elem); + +static const char *usage_str = +"usage: pgbouncer [-d] [-v] [-h|-V] config.ini\n"; + +static void usage(int err) +{ + printf(usage_str); + exit(err); +} + +/* + * configuration storage + */ + +int cf_verbose = 0; +int cf_daemon = 0; +int cf_pause_mode = 0; +int cf_shutdown = 0; +int cf_reboot = 0; +static char *cf_config_file; + +char *cf_listen_addr = NULL; +int cf_listen_port = 6000; +char *cf_unix_socket_dir = "/tmp"; + +int cf_pool_mode = POOL_SESSION; + +/* sbuf config */ +int cf_sbuf_len = 2048; +int cf_tcp_socket_buffer = 0; +#ifdef TCP_DEFER_ACCEPT +int cf_tcp_defer_accept = 45; +#else +int cf_tcp_defer_accept = 0; +#endif +int cf_tcp_keepalive = 0; +int cf_tcp_keepcnt = 0; +int cf_tcp_keepidle = 0; +int cf_tcp_keepintvl = 0; + +int cf_auth_type = AUTH_MD5; +char *cf_auth_file = "unconfigured_file"; + +int cf_max_client_conn = 20; +int cf_default_pool_size = 10; + +char *cf_server_check_query = "select 1"; +usec_t cf_server_check_delay = 30 * USEC; + +usec_t cf_server_lifetime = 60*60*USEC; +usec_t cf_server_idle_timeout = 10*60*USEC; +usec_t cf_server_connect_timeout = 15*USEC; +usec_t cf_server_login_retry = 15*USEC; +usec_t cf_query_timeout = 0*USEC; +usec_t cf_client_idle_timeout = 0*USEC; + +char *cf_logfile = NULL; +char *cf_pidfile = NULL; +static char *cf_jobname = NULL; + +char *cf_admin_users = ""; +char *cf_stats_users = ""; +int cf_stats_period = 60; + + +/* + * config file description + */ +ConfElem bouncer_params[] = { +{"job_name", true, CF_STR, &cf_jobname}, +{"conffile", true, CF_STR, &cf_config_file}, +{"logfile", true, CF_STR, &cf_logfile}, +{"pidfile", false, CF_STR, &cf_pidfile}, +{"listen_addr", false, CF_STR, &cf_listen_addr}, +{"listen_port", false, CF_INT, &cf_listen_port}, +{"unix_socket_dir", false, CF_STR, &cf_unix_socket_dir}, +{"auth_type", true, {get_auth, set_auth}}, +{"auth_file", true, CF_STR, &cf_auth_file}, +{"pool_mode", true, {get_mode, set_mode}}, +{"max_client_conn", true, CF_INT, &cf_max_client_conn}, +{"default_pool_size", true, CF_INT, &cf_default_pool_size}, + +{"server_check_query", true, CF_STR, &cf_server_check_query}, +{"server_check_delay", true, CF_TIME, &cf_server_check_delay}, +{"query_timeout", true, CF_TIME, &cf_query_timeout}, +{"client_idle_timeout", true, CF_TIME, &cf_client_idle_timeout}, +{"server_lifetime", true, CF_TIME, &cf_server_lifetime}, +{"server_idle_timeout", true, CF_TIME, &cf_server_idle_timeout}, +{"server_connect_timeout",true, CF_TIME, &cf_server_connect_timeout}, +{"server_login_retry", true, CF_TIME, &cf_server_login_retry}, + +{"pkt_buf", false, CF_INT, &cf_sbuf_len}, +{"tcp_defer_accept", false, CF_INT, &cf_tcp_defer_accept}, +{"tcp_socket_buffer", true, CF_INT, &cf_tcp_socket_buffer}, +{"tcp_keepalive", true, CF_INT, &cf_tcp_keepalive}, +{"tcp_keepcnt", true, CF_INT, &cf_tcp_keepcnt}, +{"tcp_keepidle", true, CF_INT, &cf_tcp_keepidle}, +{"tcp_keepintvl", true, CF_INT, &cf_tcp_keepintvl}, +{"verbose", true, CF_INT, &cf_verbose}, +{"admin_users", true, CF_STR, &cf_admin_users}, +{"stats_users", true, CF_STR, &cf_stats_users}, +{"stats_period", true, CF_INT, &cf_stats_period}, +{NULL}, +}; + +static ConfSection bouncer_config [] = { +{"pgbouncer", bouncer_params, NULL}, +{"databases", NULL, parse_database}, +{NULL} +}; + +static const char *get_mode(ConfElem *elem) +{ + switch (cf_pool_mode) { + case POOL_STMT: return "statement"; + case POOL_TX: return "transaction"; + case POOL_SESSION: return "session"; + default: + fatal("borken mode? should not happen"); + return NULL; + } +} + +static bool set_mode(ConfElem *elem, const char *val, PgSocket *console) +{ + if (strcasecmp(val, "session") == 0) + cf_pool_mode = POOL_SESSION; + else if (strcasecmp(val, "transaction") == 0) + cf_pool_mode = POOL_TX; + else if (strcasecmp(val, "statement") == 0) + cf_pool_mode = POOL_STMT; + else { + admin_error(console, "bad mode: %s", val); + return false; + } + return true; +} + +static const char *get_auth(ConfElem *elem) +{ + switch (cf_auth_type) { + case AUTH_ANY: return "any"; + case AUTH_TRUST: return "trust"; + case AUTH_PLAIN: return "plain"; + case AUTH_CRYPT: return "crypt"; + case AUTH_MD5: return "md5"; + default: + fatal("borken auth? should not happen"); + return NULL; + } +} + +static bool set_auth(ConfElem *elem, const char *val, PgSocket *console) +{ + if (strcasecmp(val, "any") == 0) + cf_auth_type = AUTH_ANY; + else if (strcasecmp(val, "trust") == 0) + cf_auth_type = AUTH_TRUST; + else if (strcasecmp(val, "plain") == 0) + cf_auth_type = AUTH_PLAIN; + else if (strcasecmp(val, "crypt") == 0) + cf_auth_type = AUTH_CRYPT; + else if (strcasecmp(val, "md5") == 0) + cf_auth_type = AUTH_MD5; + else { + admin_error(console, "bad auth type: %s", val); + return false; + } + return true; +} + +/* config loading, tries to be tolerant to errors */ +void load_config(bool reload) +{ + /* actual loading */ + iniparser(cf_config_file, bouncer_config, reload); + + /* load users if needed */ + if (cf_auth_type >= AUTH_TRUST) + load_auth_file(cf_auth_file); + + /* reset pool_size */ + config_postprocess(); +} + +/* + * signal handling. + * + * handle_* functions are not actual signal handlers but called from + * event_loop() so they have no restrictions what they can do. + */ +static struct event ev_sigterm; +static struct event ev_sigint; +static struct event ev_sigusr1; +static struct event ev_sigusr2; +static struct event ev_sighup; + +static void handle_sigterm(int sock, short flags, void *arg) +{ + log_info("Got SIGTERM, fast exit"); + /* pidfile cleanup happens via atexit() */ + exit(1); +} + +static void handle_sigint(int sock, short flags, void *arg) +{ + log_info("Got SIGINT, shutting down"); + cf_pause_mode = 1; + cf_shutdown = 1; +} + +static void handle_sigusr1(int sock, short flags, void *arg) +{ + if (cf_pause_mode == 0) { + log_info("Got SIGUSR1, pausing all activity"); + cf_pause_mode = 1; + } else { + log_info("Got SIGUSR1, but already paused/suspended"); + } +} + +static void handle_sigusr2(int sock, short flags, void *arg) +{ + switch (cf_pause_mode) { + case 2: + log_info("Got SIGUSR2, continuing from SUSPEND"); + resume_all(); + cf_pause_mode = 0; + break; + case 1: + log_info("Got SIGUSR2, continuing from PAUSE"); + cf_pause_mode = 0; + break; + case 0: + log_info("Got SIGUSR1, but not paused/suspended"); + } +} + +static void handle_sighup(int sock, short flags, void *arg) +{ + log_info("Got SIGHUP re-reading config"); + load_config(true); +} + +static void signal_setup(void) +{ + int err; + sigset_t set; + + /* block SIGPIPE */ + sigemptyset(&set); + sigaddset(&set, SIGPIPE); + err = sigprocmask(SIG_BLOCK, &set, NULL); + if (err < 0) + fatal_perror("sigprocmask"); + + /* install handlers */ + signal_set(&ev_sigterm, SIGTERM, handle_sigterm, NULL); + signal_add(&ev_sigterm, NULL); + signal_set(&ev_sigint, SIGINT, handle_sigint, NULL); + signal_add(&ev_sigint, NULL); + signal_set(&ev_sigusr1, SIGUSR1, handle_sigusr1, NULL); + signal_add(&ev_sigusr1, NULL); + signal_set(&ev_sigusr2, SIGUSR2, handle_sigusr2, NULL); + signal_add(&ev_sigusr2, NULL); + signal_set(&ev_sighup, SIGHUP, handle_sighup, NULL); + signal_add(&ev_sighup, NULL); +} + +/* + * daemon mode + */ +static void go_daemon(void) +{ + int pid, fd; + + if (!cf_pidfile) + fatal("daemon needs pidfile configured"); + + /* just in case close all files */ + for (fd = 3; fd < OPEN_MAX; fd++) + close(fd); + + /* send stdin, stdout, stderr to /dev/null */ + fd = open("/dev/null", O_RDWR); + if (fd < 0) + fatal_perror("/dev/null"); + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + if (fd > 2) + close(fd); + + /* fork new process */ + pid = fork(); + if (pid < 0) + fatal_perror("fork"); + if (pid > 0) + _exit(0); + + /* create new session */ + pid = setsid(); + if (pid < 0) + fatal_perror("setsid"); + + /* fork again to avoid being session leader */ + pid = fork(); + if (pid < 0) + fatal_perror("fork"); + if (pid > 0) + _exit(0); + +} + +/* + * write pidfile. if exists, quit with error. + */ +static void check_pidfile(void) +{ + struct stat st; + if (!cf_pidfile) + return; + if (stat(cf_pidfile, &st) >= 0) + fatal("pidfile exists, another instance running?"); +} + +static void remove_pidfile(void) +{ + if (!cf_pidfile) + return; + unlink(cf_pidfile); +} + +static void write_pidfile(void) +{ + char buf[64]; + pid_t pid; + int res, fd; + + if (!cf_pidfile) + return; + + pid = getpid(); + sprintf(buf, "%u", (unsigned)pid); + + fd = open(cf_pidfile, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (fd < 0) + fatal_perror(cf_pidfile); + res = safe_write(fd, buf, strlen(buf)); + if (res < 0) + fatal_perror(cf_pidfile); + safe_close(fd); + + /* only remove when we have it actually written */ + atexit(remove_pidfile); +} + +static void daemon_setup(void) +{ + if (!cf_reboot) + check_pidfile(); + if (cf_daemon) + go_daemon(); + if (!cf_reboot) + write_pidfile(); +} + +static void main_loop_once(void) +{ + reset_time_cache(); + event_loop(EVLOOP_ONCE); + per_loop_object_maint(); +} + +/* boot everything */ +int main(int argc, char *argv[]) +{ + int c; + + /* parse cmdline */ + while ((c = getopt(argc, argv, "vhdVR")) != EOF) { + switch (c) { + case 'R': + cf_reboot = 1; + break; + case 'v': + cf_verbose++; + break; + case 'V': + printf("%s version %s\n", PACKAGE_NAME, PACKAGE_VERSION); + return 0; + case 'd': + cf_daemon = 1; + break; + case 'h': + default: + usage(1); + } + } + if (optind + 1 != argc) + usage(1); + cf_config_file = argv[optind]; + load_config(false); + + /* init random */ + srandom(time(NULL) ^ getpid()); + + /* initialize subsystems, order important */ + daemon_setup(); + event_init(); + signal_setup(); + janitor_setup(); + stats_setup(); + admin_setup(); + + if (cf_reboot) { + takeover_init(); + while (cf_reboot) + main_loop_once(); + write_pidfile(); + } else + pooler_setup(); + + /* main loop */ + while (1) + main_loop_once(); +} + diff --git a/src/mbuf.h b/src/mbuf.h new file mode 100644 index 0000000..9bb38bd --- /dev/null +++ b/src/mbuf.h @@ -0,0 +1,104 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Safe and easy access to fixed memory buffer + */ + +typedef struct MBuf MBuf; +struct MBuf { + const uint8 *data; + const uint8 *end; + const uint8 *pos; +}; + +static inline void mbuf_init(MBuf *buf, const uint8 *ptr, int len) +{ + if (len < 0) + fatal("fuckup"); + buf->data = buf->pos = ptr; + buf->end = ptr + len; +} + +static inline uint8 mbuf_get_char(MBuf *buf) +{ + if (buf->pos + 1 > buf->end) + fatal("buffer overflow"); + return *buf->pos++; +} + +static inline unsigned mbuf_get_uint16(MBuf *buf) +{ + unsigned val; + if (buf->pos + 2 > buf->end) + fatal("buffer overflow"); + val = *buf->pos++; + val = (val << 8) | *buf->pos++; + return val; +} + +static inline unsigned mbuf_get_uint32(MBuf *buf) +{ + unsigned val; + if (buf->pos + 4 > buf->end) + fatal("buffer overflow"); + val = *buf->pos++; + val = (val << 8) | *buf->pos++; + val = (val << 8) | *buf->pos++; + val = (val << 8) | *buf->pos++; + return val; +} + +static inline unsigned mbuf_get_uint64(MBuf *buf) +{ + uint64 i1, i2; + i1 = mbuf_get_uint32(buf); + i2 = mbuf_get_uint32(buf); + return (i1 << 32) | i2; +} + +static inline const uint8 * mbuf_get_bytes(MBuf *buf, unsigned len) +{ + const uint8 *res = buf->pos; + if (len > buf->end - buf->pos) + fatal("buffer overflow"); + buf->pos += len; + return res; +} + +static inline const char * mbuf_get_string(MBuf *buf) +{ + const char *res = (const char *)buf->pos; + while (buf->pos < buf->end && *buf->pos) + buf->pos++; + if (buf->pos == buf->end) + return NULL; + buf->pos++; + return res; +} + +static inline unsigned mbuf_avail(MBuf *buf) +{ + return buf->end - buf->pos; +} + +static inline unsigned mbuf_size(MBuf *buf) +{ + return buf->end - buf->data; +} + diff --git a/src/md5.c b/src/md5.c new file mode 100644 index 0000000..c52e340 --- /dev/null +++ b/src/md5.c @@ -0,0 +1,407 @@ +/* $KAME: md5.c,v 1.3 2000/02/22 14:01:17 itojun Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $PostgreSQL: pgsql/contrib/pgcrypto/md5.c,v 1.13 2005/07/11 15:07:59 tgl Exp $ + */ + +#include "bouncer.h" + +#include + +#include "md5.h" + +/* sanity check */ +#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN) +#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN +#endif + +#define SHIFT(X, s) (((X) << (s)) | ((X) >> (32 - (s)))) + +#define F(X, Y, Z) (((X) & (Y)) | ((~X) & (Z))) +#define G(X, Y, Z) (((X) & (Z)) | ((Y) & (~Z))) +#define H(X, Y, Z) ((X) ^ (Y) ^ (Z)) +#define I(X, Y, Z) ((Y) ^ ((X) | (~Z))) + +#define ROUND1(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + F((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define ROUND2(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + G((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define ROUND3(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + H((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define ROUND4(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + I((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define Sa 7 +#define Sb 12 +#define Sc 17 +#define Sd 22 + +#define Se 5 +#define Sf 9 +#define Sg 14 +#define Sh 20 + +#define Si 4 +#define Sj 11 +#define Sk 16 +#define Sl 23 + +#define Sm 6 +#define Sn 10 +#define So 15 +#define Sp 21 + +#define MD5_A0 0x67452301 +#define MD5_B0 0xefcdab89 +#define MD5_C0 0x98badcfe +#define MD5_D0 0x10325476 + +/* Integer part of 4294967296 times abs(sin(i)), where i is in radians. */ +static const uint32 T[65] = { + 0, + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; + +static const uint8 md5_paddat[MD5_BUFLEN] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static void md5_calc(uint8 *, md5_ctxt *); + +void +md5_init(md5_ctxt * ctxt) +{ + ctxt->md5_n = 0; + ctxt->md5_i = 0; + ctxt->md5_sta = MD5_A0; + ctxt->md5_stb = MD5_B0; + ctxt->md5_stc = MD5_C0; + ctxt->md5_std = MD5_D0; + memset(ctxt->md5_buf, 0, sizeof(ctxt->md5_buf)); +} + +void +md5_loop(md5_ctxt * ctxt, const uint8 *input, unsigned len) +{ + unsigned int gap, + i; + + ctxt->md5_n += len * 8; /* byte to bit */ + gap = MD5_BUFLEN - ctxt->md5_i; + + if (len >= gap) + { + memmove(ctxt->md5_buf + ctxt->md5_i, input, gap); + md5_calc(ctxt->md5_buf, ctxt); + + for (i = gap; i + MD5_BUFLEN <= len; i += MD5_BUFLEN) + md5_calc((uint8 *) (input + i), ctxt); + + ctxt->md5_i = len - i; + memmove(ctxt->md5_buf, input + i, ctxt->md5_i); + } + else + { + memmove(ctxt->md5_buf + ctxt->md5_i, input, len); + ctxt->md5_i += len; + } +} + +void +md5_pad(md5_ctxt * ctxt) +{ + unsigned int gap; + + /* Don't count up padding. Keep md5_n. */ + gap = MD5_BUFLEN - ctxt->md5_i; + if (gap > 8) + { + memmove(ctxt->md5_buf + ctxt->md5_i, md5_paddat, + gap - sizeof(ctxt->md5_n)); + } + else + { + /* including gap == 8 */ + memmove(ctxt->md5_buf + ctxt->md5_i, md5_paddat, gap); + md5_calc(ctxt->md5_buf, ctxt); + memmove(ctxt->md5_buf, md5_paddat + gap, + MD5_BUFLEN - sizeof(ctxt->md5_n)); + } + + /* 8 byte word */ +#if BYTE_ORDER == LITTLE_ENDIAN + memmove(&ctxt->md5_buf[56], &ctxt->md5_n8[0], 8); +#endif +#if BYTE_ORDER == BIG_ENDIAN + ctxt->md5_buf[56] = ctxt->md5_n8[7]; + ctxt->md5_buf[57] = ctxt->md5_n8[6]; + ctxt->md5_buf[58] = ctxt->md5_n8[5]; + ctxt->md5_buf[59] = ctxt->md5_n8[4]; + ctxt->md5_buf[60] = ctxt->md5_n8[3]; + ctxt->md5_buf[61] = ctxt->md5_n8[2]; + ctxt->md5_buf[62] = ctxt->md5_n8[1]; + ctxt->md5_buf[63] = ctxt->md5_n8[0]; +#endif + + md5_calc(ctxt->md5_buf, ctxt); +} + +void +md5_result(uint8 *digest, md5_ctxt * ctxt) +{ + /* 4 byte words */ +#if BYTE_ORDER == LITTLE_ENDIAN + memmove(digest, &ctxt->md5_st8[0], 16); +#endif +#if BYTE_ORDER == BIG_ENDIAN + digest[0] = ctxt->md5_st8[3]; + digest[1] = ctxt->md5_st8[2]; + digest[2] = ctxt->md5_st8[1]; + digest[3] = ctxt->md5_st8[0]; + digest[4] = ctxt->md5_st8[7]; + digest[5] = ctxt->md5_st8[6]; + digest[6] = ctxt->md5_st8[5]; + digest[7] = ctxt->md5_st8[4]; + digest[8] = ctxt->md5_st8[11]; + digest[9] = ctxt->md5_st8[10]; + digest[10] = ctxt->md5_st8[9]; + digest[11] = ctxt->md5_st8[8]; + digest[12] = ctxt->md5_st8[15]; + digest[13] = ctxt->md5_st8[14]; + digest[14] = ctxt->md5_st8[13]; + digest[15] = ctxt->md5_st8[12]; +#endif +} + +#if BYTE_ORDER == BIG_ENDIAN +static uint32 X[16]; +#endif + +static void +md5_calc(uint8 *b64, md5_ctxt * ctxt) +{ + uint32 A = ctxt->md5_sta; + uint32 B = ctxt->md5_stb; + uint32 C = ctxt->md5_stc; + uint32 D = ctxt->md5_std; + +#if BYTE_ORDER == LITTLE_ENDIAN + uint32 *X = (uint32 *) b64; +#endif +#if BYTE_ORDER == BIG_ENDIAN + /* 4 byte words */ + /* what a brute force but fast! */ + uint8 *y = (uint8 *) X; + + y[0] = b64[3]; + y[1] = b64[2]; + y[2] = b64[1]; + y[3] = b64[0]; + y[4] = b64[7]; + y[5] = b64[6]; + y[6] = b64[5]; + y[7] = b64[4]; + y[8] = b64[11]; + y[9] = b64[10]; + y[10] = b64[9]; + y[11] = b64[8]; + y[12] = b64[15]; + y[13] = b64[14]; + y[14] = b64[13]; + y[15] = b64[12]; + y[16] = b64[19]; + y[17] = b64[18]; + y[18] = b64[17]; + y[19] = b64[16]; + y[20] = b64[23]; + y[21] = b64[22]; + y[22] = b64[21]; + y[23] = b64[20]; + y[24] = b64[27]; + y[25] = b64[26]; + y[26] = b64[25]; + y[27] = b64[24]; + y[28] = b64[31]; + y[29] = b64[30]; + y[30] = b64[29]; + y[31] = b64[28]; + y[32] = b64[35]; + y[33] = b64[34]; + y[34] = b64[33]; + y[35] = b64[32]; + y[36] = b64[39]; + y[37] = b64[38]; + y[38] = b64[37]; + y[39] = b64[36]; + y[40] = b64[43]; + y[41] = b64[42]; + y[42] = b64[41]; + y[43] = b64[40]; + y[44] = b64[47]; + y[45] = b64[46]; + y[46] = b64[45]; + y[47] = b64[44]; + y[48] = b64[51]; + y[49] = b64[50]; + y[50] = b64[49]; + y[51] = b64[48]; + y[52] = b64[55]; + y[53] = b64[54]; + y[54] = b64[53]; + y[55] = b64[52]; + y[56] = b64[59]; + y[57] = b64[58]; + y[58] = b64[57]; + y[59] = b64[56]; + y[60] = b64[63]; + y[61] = b64[62]; + y[62] = b64[61]; + y[63] = b64[60]; +#endif + + ROUND1(A, B, C, D, 0, Sa, 1); + ROUND1(D, A, B, C, 1, Sb, 2); + ROUND1(C, D, A, B, 2, Sc, 3); + ROUND1(B, C, D, A, 3, Sd, 4); + ROUND1(A, B, C, D, 4, Sa, 5); + ROUND1(D, A, B, C, 5, Sb, 6); + ROUND1(C, D, A, B, 6, Sc, 7); + ROUND1(B, C, D, A, 7, Sd, 8); + ROUND1(A, B, C, D, 8, Sa, 9); + ROUND1(D, A, B, C, 9, Sb, 10); + ROUND1(C, D, A, B, 10, Sc, 11); + ROUND1(B, C, D, A, 11, Sd, 12); + ROUND1(A, B, C, D, 12, Sa, 13); + ROUND1(D, A, B, C, 13, Sb, 14); + ROUND1(C, D, A, B, 14, Sc, 15); + ROUND1(B, C, D, A, 15, Sd, 16); + + ROUND2(A, B, C, D, 1, Se, 17); + ROUND2(D, A, B, C, 6, Sf, 18); + ROUND2(C, D, A, B, 11, Sg, 19); + ROUND2(B, C, D, A, 0, Sh, 20); + ROUND2(A, B, C, D, 5, Se, 21); + ROUND2(D, A, B, C, 10, Sf, 22); + ROUND2(C, D, A, B, 15, Sg, 23); + ROUND2(B, C, D, A, 4, Sh, 24); + ROUND2(A, B, C, D, 9, Se, 25); + ROUND2(D, A, B, C, 14, Sf, 26); + ROUND2(C, D, A, B, 3, Sg, 27); + ROUND2(B, C, D, A, 8, Sh, 28); + ROUND2(A, B, C, D, 13, Se, 29); + ROUND2(D, A, B, C, 2, Sf, 30); + ROUND2(C, D, A, B, 7, Sg, 31); + ROUND2(B, C, D, A, 12, Sh, 32); + + ROUND3(A, B, C, D, 5, Si, 33); + ROUND3(D, A, B, C, 8, Sj, 34); + ROUND3(C, D, A, B, 11, Sk, 35); + ROUND3(B, C, D, A, 14, Sl, 36); + ROUND3(A, B, C, D, 1, Si, 37); + ROUND3(D, A, B, C, 4, Sj, 38); + ROUND3(C, D, A, B, 7, Sk, 39); + ROUND3(B, C, D, A, 10, Sl, 40); + ROUND3(A, B, C, D, 13, Si, 41); + ROUND3(D, A, B, C, 0, Sj, 42); + ROUND3(C, D, A, B, 3, Sk, 43); + ROUND3(B, C, D, A, 6, Sl, 44); + ROUND3(A, B, C, D, 9, Si, 45); + ROUND3(D, A, B, C, 12, Sj, 46); + ROUND3(C, D, A, B, 15, Sk, 47); + ROUND3(B, C, D, A, 2, Sl, 48); + + ROUND4(A, B, C, D, 0, Sm, 49); + ROUND4(D, A, B, C, 7, Sn, 50); + ROUND4(C, D, A, B, 14, So, 51); + ROUND4(B, C, D, A, 5, Sp, 52); + ROUND4(A, B, C, D, 12, Sm, 53); + ROUND4(D, A, B, C, 3, Sn, 54); + ROUND4(C, D, A, B, 10, So, 55); + ROUND4(B, C, D, A, 1, Sp, 56); + ROUND4(A, B, C, D, 8, Sm, 57); + ROUND4(D, A, B, C, 15, Sn, 58); + ROUND4(C, D, A, B, 6, So, 59); + ROUND4(B, C, D, A, 13, Sp, 60); + ROUND4(A, B, C, D, 4, Sm, 61); + ROUND4(D, A, B, C, 11, Sn, 62); + ROUND4(C, D, A, B, 2, So, 63); + ROUND4(B, C, D, A, 9, Sp, 64); + + ctxt->md5_sta += A; + ctxt->md5_stb += B; + ctxt->md5_stc += C; + ctxt->md5_std += D; +} + +/* vi: set ts=4: */ diff --git a/src/md5.h b/src/md5.h new file mode 100644 index 0000000..58fa491 --- /dev/null +++ b/src/md5.h @@ -0,0 +1,82 @@ +/* $PostgreSQL: pgsql/contrib/pgcrypto/md5.h,v 1.9 2005/10/15 02:49:06 momjian Exp $ */ +/* $KAME: md5.h,v 1.3 2000/02/22 14:01:18 itojun Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETINET6_MD5_H_ +#define _NETINET6_MD5_H_ + +#define MD5_BUFLEN 64 +#define MD5_DIGEST_LENGTH 16 + +typedef struct +{ + union + { + uint32 md5_state32[4]; + uint8 md5_state8[16]; + } md5_st; + +#define md5_sta md5_st.md5_state32[0] +#define md5_stb md5_st.md5_state32[1] +#define md5_stc md5_st.md5_state32[2] +#define md5_std md5_st.md5_state32[3] +#define md5_st8 md5_st.md5_state8 + + union + { + uint64 md5_count64; + uint8 md5_count8[8]; + } md5_count; +#define md5_n md5_count.md5_count64 +#define md5_n8 md5_count.md5_count8 + + unsigned int md5_i; + uint8 md5_buf[MD5_BUFLEN]; +} md5_ctxt; + +extern void md5_init(md5_ctxt *); +extern void md5_loop(md5_ctxt *, const uint8 *, unsigned int); +extern void md5_pad(md5_ctxt *); +extern void md5_result(uint8 *, md5_ctxt *); + +/* compatibility with OpenSSL */ +#define MD5_CTX md5_ctxt +#define MD5_Init(x) md5_init((x)) +#define MD5_Update(x, y, z) md5_loop((x), (void*)(y), (z)) +#define MD5_Final(x, y) \ +do { \ + md5_pad((y)); \ + md5_result((x), (y)); \ +} while (0) + +#endif /* ! _NETINET6_MD5_H_ */ + +/* vi: set ts=4: */ diff --git a/src/objects.c b/src/objects.c new file mode 100644 index 0000000..e84445e --- /dev/null +++ b/src/objects.c @@ -0,0 +1,931 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Herding objects between lists happens here. + */ + +#include "bouncer.h" + +/* those items will be allocated as needed, never freed */ +STATLIST(user_list); +STATLIST(database_list); +STATLIST(pool_list); + +/* + * client and server objects will be pre-allocated + * they are always in either active or free lists + * in addition to others. + */ +STATLIST(free_client_list); +STATLIST(free_server_list); +STATLIST(login_client_list); + +/* how many client sockets are allocated */ +static int absolute_client_count = 0; +/* how many server sockets are allocated */ +static int absolute_server_count = 0; + +/* list of users ordered by name */ +static PgUser **user_lookup = NULL; + +/* drop lookup list because it will be out of sync */ +static void reset_auth_cache(void) +{ + if (user_lookup != NULL) { + free(user_lookup); + user_lookup = NULL; + } +} + +/* fast way to get number of active clients */ +int get_active_client_count(void) +{ + return absolute_client_count - statlist_count(&free_client_list); +} + +/* fast way to get number of active servers */ +int get_active_server_count(void) +{ + return absolute_server_count - statlist_count(&free_server_list); +} + +/* this should be called on free socket that is put into use */ +static void clean_socket(PgSocket *sk) +{ + sk->link = NULL; + sk->pool = NULL; + + sk->wait_for_welcome = 0; + sk->ready = 0; + sk->flush_req = 0; + sk->admin_user = 0; + sk->own_user = 0; + sk->suspended = 0; + sk->wait_for_response = 0; + + sk->connect_time = 0; + sk->request_time = 0; + sk->query_start = 0; + + sk->auth_user = NULL; +} + +/* allocate & fll client socket */ +static PgSocket *new_client(void) +{ + PgSocket *client; + + /* get free PgSocket */ + client = first_socket(&free_client_list); + if (client) { + clean_socket(client); + return client; + } + + client = zmalloc(sizeof(*client) + cf_sbuf_len); + if (!client) + return NULL; + + list_init(&client->head); + sbuf_init(&client->sbuf, client_proto, client); + statlist_prepend(&client->head, &free_client_list); + client->state = CL_FREE; + + absolute_client_count++; + + return client; +} + +/* allocate & fill server socket */ +static PgSocket *new_server(void) +{ + PgSocket *server; + + /* get free PgSocket */ + server = first_socket(&free_server_list); + if (server) { + clean_socket(server); + return server; + } + + server = zmalloc(sizeof(*server) + cf_sbuf_len); + if (!server) + return NULL; + + list_init(&server->head); + sbuf_init(&server->sbuf, server_proto, server); + statlist_prepend(&server->head, &free_server_list); + server->state = SV_FREE; + + absolute_server_count++; + + return server; +} + +/* state change means moving between lists */ +void change_client_state(PgSocket *client, SocketState newstate) +{ + PgPool *pool = client->pool; + + /* remove from old location */ + switch (client->state) { + case CL_FREE: + statlist_remove(&client->head, &free_client_list); + break; + case CL_LOGIN: + statlist_remove(&client->head, &login_client_list); + break; + case CL_WAITING: + statlist_remove(&client->head, &pool->waiting_client_list); + break; + case CL_ACTIVE: + statlist_remove(&client->head, &pool->active_client_list); + break; + case CL_CANCEL: + statlist_remove(&client->head, &pool->cancel_req_list); + break; + default: + fatal("bad cur client state: %d", client->state); + } + + client->state = newstate; + + /* put to new location */ + switch (client->state) { + case CL_FREE: + /* use LIFO the keep cache warm */ + statlist_prepend(&client->head, &free_client_list); + break; + case CL_LOGIN: + statlist_append(&client->head, &login_client_list); + break; + case CL_WAITING: + statlist_append(&client->head, &pool->waiting_client_list); + break; + case CL_ACTIVE: + statlist_append(&client->head, &pool->active_client_list); + break; + case CL_CANCEL: + statlist_append(&client->head, &pool->cancel_req_list); + break; + default: + fatal("bad new client state: %d", client->state); + } +} + +/* state change means moving between lists */ +void change_server_state(PgSocket *server, SocketState newstate) +{ + PgPool *pool = server->pool; + + /* remove from old location */ + switch (server->state) { + case SV_FREE: + statlist_remove(&server->head, &free_server_list); + break; + case SV_LOGIN: + statlist_remove(&server->head, &pool->new_server_list); + break; + case SV_USED: + statlist_remove(&server->head, &pool->used_server_list); + break; + case SV_TESTED: + statlist_remove(&server->head, &pool->tested_server_list); + break; + case SV_IDLE: + statlist_remove(&server->head, &pool->idle_server_list); + break; + case SV_ACTIVE: + statlist_remove(&server->head, &pool->active_server_list); + break; + default: + fatal("change_server_state: bad old server state: %d", server->state); + } + + server->state = newstate; + + /* put to new location */ + switch (server->state) { + case SV_FREE: + /* use LIFO the keep cache warm */ + statlist_prepend(&server->head, &free_server_list); + break; + case SV_LOGIN: + statlist_append(&server->head, &pool->new_server_list); + break; + case SV_USED: + /* again, LIFO */ + statlist_prepend(&server->head, &pool->used_server_list); + break; + case SV_TESTED: + statlist_append(&server->head, &pool->tested_server_list); + break; + case SV_IDLE: + if (server->close_needed) + /* try to avoid immidiate usage then */ + statlist_append(&server->head, &pool->idle_server_list); + else + /* otherwise use LIFO */ + statlist_prepend(&server->head, &pool->idle_server_list); + break; + case SV_ACTIVE: + statlist_append(&server->head, &pool->active_server_list); + break; + default: + fatal("bad server state"); + } +} + +/* compare pool names, for use with put_in_order */ +static int cmp_pool(List *i1, List *i2) +{ + PgPool *p1 = container_of(i1, PgPool, head); + PgPool *p2 = container_of(i2, PgPool, head); + if (p1->db != p2->db) + return strcmp(p1->db->name, p2->db->name); + if (p1->user != p2->user) + return strcmp(p1->user->name, p2->user->name); + return 0; +} + +/* compare user names, for use with put_in_order */ +static int cmp_user(List *i1, List *i2) +{ + PgUser *u1 = container_of(i1, PgUser, head); + PgUser *u2 = container_of(i2, PgUser, head); + return strcmp(u1->name, u2->name); +} + +/* compare db names, for use with put_in_order */ +static int cmp_database(List *i1, List *i2) +{ + PgDatabase *db1 = container_of(i1, PgDatabase, head); + PgDatabase *db2 = container_of(i2, PgDatabase, head); + return strcmp(db1->name, db2->name); +} + +/* put elem into list in correct pos */ +static void put_in_order(List *newitem, StatList *list, int (*cmpfn)(List *, List *)) +{ + int res; + List *item; + + statlist_for_each(item, list) { + res = cmpfn(item, newitem); + if (res == 0) + fatal("put_in_order: found existing elem"); + else if (res > 0) { + statlist_put_before(newitem, list, item); + return; + } + } + statlist_append(newitem, list); +} + +/* create new object if new, then return it */ +PgDatabase *add_database(const char *name) +{ + PgDatabase *db = find_database(name); + + /* create new object if needed */ + if (db == NULL) { + db = zmalloc(sizeof(*db)); + if (!db) + return NULL; + + list_init(&db->head); + strlcpy(db->name, name, sizeof(db->name)); + put_in_order(&db->head, &database_list, cmp_database); + } + + return db; +} + +/* add or update client users */ +PgUser *add_user(const char *name, const char *passwd) +{ + PgUser *user = find_user(name); + + reset_auth_cache(); + + if (user == NULL) { + user = zmalloc(sizeof(*user)); + if (!user) + return NULL; + + list_init(&user->head); + list_init(&user->pool_list); + strlcpy(user->name, name, sizeof(user->name)); + put_in_order(&user->head, &user_list, cmp_user); + } + strlcpy(user->passwd, passwd, sizeof(user->passwd)); + return user; +} + +/* create separate user object for storing server user info */ +PgUser *force_user(PgDatabase *db, const char *name, const char *passwd) +{ + PgUser *user = db->forced_user; + if (!user) { + user = zmalloc(sizeof(*user)); + if (!user) + return NULL; + list_init(&user->head); + list_init(&user->pool_list); + } + strlcpy(user->name, name, sizeof(user->name)); + strlcpy(user->passwd, passwd, sizeof(user->passwd)); + db->forced_user = user; + return user; +} + +/* find a existing database */ +PgDatabase *find_database(const char *name) +{ + List *item; + PgDatabase *db; + statlist_for_each(item, &database_list) { + db = container_of(item, PgDatabase, head); + if (strcmp(db->name, name) == 0) + return db; + } + return NULL; +} + +/* compare string with PgUser->name, for usage with bsearch() */ +static int user_name_cmp(const void *namestr, const void *userptr) +{ + const PgUser * const *user_p = userptr; + const PgUser *user = *user_p; + return strcmp(namestr, user->name); +} + +/* find existing user */ +PgUser *find_user(const char *name) +{ + List *item; + PgUser *user; + + /* if lookup table is available, use faster method */ + if (user_lookup) { + PgUser **res; + res = bsearch(name, user_lookup, + statlist_count(&user_list), + sizeof(PgUser *), + user_name_cmp); + return res ? *res : NULL; + } + + /* slow lookup */ + statlist_for_each(item, &user_list) { + user = container_of(item, PgUser, head); + if (strcmp(user->name, name) == 0) + return user; + } + return NULL; +} + +/* create lookup list */ +void create_auth_cache(void) +{ + int i = 0; + List *item; + PgUser *user; + + reset_auth_cache(); + + user_lookup = malloc(sizeof(PgUser *) * statlist_count(&user_list)); + if (!user_lookup) + return; + + statlist_for_each(item, &user_list) { + user = container_of(item, PgUser, head); + user_lookup[i++] = user; + } +} + +/* create new pool object */ +static PgPool *new_pool(PgDatabase *db, PgUser *user) +{ + PgPool *pool; + + pool = zmalloc(sizeof(*pool)); + if (!pool) + return NULL; + + list_init(&pool->head); + list_init(&pool->map_head); + + pool->user = user; + pool->db = db; + + statlist_init(&pool->active_client_list, "active_client_list"); + statlist_init(&pool->waiting_client_list, "waiting_client_list"); + statlist_init(&pool->active_server_list, "active_server_list"); + statlist_init(&pool->idle_server_list, "idle_server_list"); + statlist_init(&pool->tested_server_list, "tested_server_list"); + statlist_init(&pool->used_server_list, "used_server_list"); + statlist_init(&pool->new_server_list, "new_server_list"); + statlist_init(&pool->cancel_req_list, "cancel_req_list"); + + list_append(&pool->map_head, &user->pool_list); + + /* keep pools in db/user order to make stats faster */ + put_in_order(&pool->head, &pool_list, cmp_pool); + + return pool; +} + +/* find pool object, create if needed */ +PgPool *get_pool(PgDatabase *db, PgUser *user) +{ + List *item; + PgPool *pool; + + if (!db || !user) + return NULL; + + list_for_each(item, &user->pool_list) { + pool = container_of(item, PgPool, map_head); + if (pool->db == db) + return pool; + } + + return new_pool(db, user); +} + +/* deactivate socket and put into wait queue */ +void pause_client(PgSocket *client) +{ + Assert(client->state == CL_ACTIVE); + + slog_debug(client, "pause_client"); + change_client_state(client, CL_WAITING); + sbuf_pause(&client->sbuf); +} + +/* wake client from wait */ +void activate_client(PgSocket *client) +{ + Assert(client->state == CL_WAITING); + + slog_debug(client, "activate_client"); + change_client_state(client, CL_ACTIVE); + sbuf_continue(&client->sbuf); +} + +/* link if found, otherwise put into wait queue */ +bool find_server(PgSocket *client) +{ + PgPool *pool = client->pool; + PgSocket *server; + bool res; + + Assert(client->state == CL_ACTIVE); + + if (client->link) + return true; + + /* try to get idle server, if allowed */ + if (cf_pause_mode == 1) + server = NULL; + else + server = first_socket(&pool->idle_server_list); + + /* link or send to waiters list */ + if (server) { + Assert(server->state == SV_IDLE); + client->link = server; + server->link = client; + change_server_state(server, SV_ACTIVE); + res = true; + } else { + pause_client(client); + Assert(client->state == CL_WAITING); + res = false; + } + return res; +} + +/* connecting/active -> idle, unlink if needed */ +void release_server(PgSocket *server) +{ + PgPool *pool = server->pool; + SocketState newstate = SV_IDLE; + + /* btw, this function is not allowed to disconnect, + as there may be packet pending */ + Assert(server->ready); + + /* remove from old list */ + switch (server->state) { + case SV_ACTIVE: + server->link->link = NULL; + server->link = NULL; + + if (cf_server_check_delay == 0 && *cf_server_check_query) + newstate = SV_USED; + case SV_USED: + case SV_TESTED: + break; + case SV_LOGIN: + pool->last_connect_failed = 0; + break; + default: + fatal("bad server state in release_server"); + } + + Assert(server->link == NULL); + + log_debug("release_server: new state=%d", newstate); + + change_server_state(server, newstate); +} + +/* drop server connection */ +void disconnect_server(PgSocket *server, bool notify, const char *reason) +{ + PgPool *pool = server->pool; + PgSocket *client = server->link; + static const uint8 pkt_term[] = {'X', 0,0,0,4}; + int send_term = 1; + + log_debug("disconnect_server"); + slog_info(server, "closing because: %s", reason); + + switch (server->state) { + case SV_ACTIVE: + client = server->link; + if (client) { + client->link = NULL; + server->link = NULL; + disconnect_client(client, true, reason); + } + break; + case SV_TESTED: + case SV_USED: + case SV_IDLE: + break; + case SV_LOGIN: + /* + * usually disconnect means problems in startup phase, + * except when sending cancel packet + */ + if (!server->ready) + pool->last_connect_failed = 1; + else + send_term = 0; + break; + default: + fatal("disconnect_server: bad server state"); + } + + Assert(server->link == NULL); + + /* notify server and close connection */ + if (send_term && notify) + sbuf_answer(&server->sbuf, pkt_term, sizeof(pkt_term)); + sbuf_close(&server->sbuf); + + change_server_state(server, SV_FREE); +} + +/* drop client connection */ +void disconnect_client(PgSocket *client, bool notify, const char *reason) +{ + slog_debug(client, "closing because: %s", reason); + + switch (client->state) { + case CL_ACTIVE: + if (client->link) { + PgSocket *server = client->link; + if (server->ready) { + release_server(server); + } else { + server->link = NULL; + client->link = NULL; + disconnect_server(server, true, "unclean server"); + } + } + case CL_LOGIN: + case CL_WAITING: + case CL_CANCEL: + break; + default: + fatal("bad client state in disconnect_client: %d", client->state); + } + + /* send reason to client */ + if (notify && reason) { + /* + * dont send Ready pkt here, or client wont notice + * closed connection + */ + send_pooler_error(client, false, reason); + } + + sbuf_close(&client->sbuf); + + change_client_state(client, CL_FREE); +} + +/* the pool needs new connection, if possible */ +void launch_new_connection(PgPool *pool) +{ + PgSocket *server; + int total; + + /* allow only small number of connection attempts at a time */ + if (!statlist_empty(&pool->new_server_list)) { + log_debug("launch_new_connection: already progress"); + return; + } + + /* if server bounces, dont retry too fast */ + if (pool->last_connect_failed) { + usec_t now = get_cached_time(); + if (now - pool->last_connect_time < cf_server_login_retry) { + log_debug("launch_new_connection: last failed, wait"); + return; + } + } + + /* is it allowed to add servers? */ + total = pool_server_count(pool); + if (total >= pool->db->pool_size && pool->db->welcome_msg_ready) { + log_debug("launch_new_connection: pool full (%d >= %d)", + total, pool->db->pool_size); + return; + } + + /* get free conn object */ + server = new_server(); + if (!server) { + log_debug("launch_new_connection: no mem"); + return; + } + + /* initialize it */ + server->pool = pool; + server->auth_user = server->pool->user; + server->addr = server->pool->db->addr; + server->connect_time = get_cached_time(); + pool->last_connect_time = get_cached_time(); + change_server_state(server, SV_LOGIN); + + /* start connecting */ + slog_info(server, "new connection to server"); + sbuf_connect(&server->sbuf, &server->addr, cf_server_connect_timeout / USEC); +} + +/* new client connection attempt */ +PgSocket * accept_client(int sock, + const struct sockaddr_in *addr, + bool is_unix) +{ + PgSocket *client; + + /* get free PgSocket */ + client = new_client(); + if (!client) + return NULL; + + client->connect_time = client->request_time = get_cached_time(); + client->query_start = 0; + + if (addr) { + client->addr.ip_addr = addr->sin_addr; + client->addr.port = ntohs(addr->sin_port); + } else { + memset(&client->addr, 0, sizeof(client->addr)); + } + client->addr.is_unix = is_unix; + change_client_state(client, CL_LOGIN); + + slog_debug(client, "got connection attempt"); + sbuf_accept(&client->sbuf, sock, is_unix); + + return client; +} + +/* send cached parameters to client to pretend being server */ +/* client managed to authenticate, send welcome msg and accept queries */ +bool finish_client_login(PgSocket *client) +{ + switch (client->state) { + case CL_LOGIN: + change_client_state(client, CL_ACTIVE); + case CL_ACTIVE: + break; + default: + fatal("bad client state"); + } + + if (!welcome_client(client)) { + log_debug("finish_client_login: no welcome msg, pause"); + client->wait_for_welcome = 1; + pause_client(client); + if (!cf_pause_mode) + launch_new_connection(client->pool); + return false; + } + client->wait_for_welcome = 0; + + slog_debug(client, "logged in"); + return true; +} + +/* client->cancel_key has requested client key */ +void accept_cancel_request(PgSocket *req) +{ + List *pitem, *citem; + PgPool *pool; + PgSocket *server = NULL, *client, *main_client = NULL; + + Assert(req->state == CL_LOGIN); + + /* find real client this is for */ + statlist_for_each(pitem, &pool_list) { + pool = container_of(pitem, PgPool, head); + statlist_for_each(citem, &pool->active_client_list) { + client = container_of(citem, PgSocket, head); + if (memcmp(client->cancel_key, req->cancel_key, 8) == 0) { + main_client = client; + break; + } + } + } + + /* wrong key */ + if (!main_client) { + disconnect_client(req, false, "failed cancel req"); + return; + } + + /* not linked client, just drop it then */ + if (!main_client->link) { + disconnect_client(main_client, true, "canceling idle client"); + disconnect_client(req, false, "cancel req for idle client"); + return; + } + + /* drop the connection silently */ + sbuf_close(&req->sbuf); + + /* remember server key */ + server = main_client->link; + memcpy(req->cancel_key, server->cancel_key, 8); + statlist_remove(&req->head, &login_client_list); + statlist_append(&req->head, &pool->cancel_req_list); + req->state = CL_CANCEL; + + launch_new_connection(pool); +} + +void forward_cancel_request(PgSocket *server) +{ + bool res; + PgSocket *req = first_socket(&server->pool->cancel_req_list); + + Assert(req != NULL && req->state == CL_CANCEL); + Assert(server->state == SV_LOGIN); + + SEND_CancelRequest(res, server, req->cancel_key); + + change_client_state(req, CL_FREE); +} + +bool use_client_socket(int fd, PgAddr *addr, + const char *dbname, const char *username, + uint64 ckey, int oldfd, int linkfd) +{ + PgDatabase *db = find_database(dbname); + PgUser *user = find_user(username); + PgPool *pool = get_pool(db, user); + PgSocket *client; + PktBuf tmp; + + if (!pool) + return false; + + client = accept_client(fd, NULL, addr->is_unix); + client->addr = *addr; + client->suspended = 1; + + if (!set_pool(client, dbname, username)) + return false; + + change_client_state(client, CL_ACTIVE); + + /* store old cancel key */ + pktbuf_static(&tmp, client->cancel_key, 8); + pktbuf_put_uint64(&tmp, ckey); + + /* store old fds */ + client->tmp_sk_oldfd = oldfd; + client->tmp_sk_linkfd = linkfd; + + return true; +} + +bool use_server_socket(int fd, PgAddr *addr, + const char *dbname, const char *username, + uint64 ckey, int oldfd, int linkfd) +{ + PgDatabase *db = find_database(dbname); + PgUser *user; + PgPool *pool; + PgSocket *server; + PktBuf tmp; + + if (db->forced_user) + user = db->forced_user; + else + user = find_user(username); + + pool = get_pool(db, user); + if (!pool) + return false; + + server = new_server(); + if (!server) + return false; + + sbuf_accept(&server->sbuf, fd, addr->is_unix); + server->suspended = 1; + server->pool = pool; + server->auth_user = user; + server->addr = *addr; + server->connect_time = server->request_time = get_cached_time(); + server->query_start = 0; + + if (linkfd) + change_server_state(server, SV_ACTIVE); + else + change_server_state(server, SV_IDLE); + + /* store old cancel key */ + pktbuf_static(&tmp, server->cancel_key, 8); + pktbuf_put_uint64(&tmp, ckey); + + /* store old fds */ + server->tmp_sk_oldfd = oldfd; + server->tmp_sk_linkfd = linkfd; + + return true; +} + +void for_each_server(PgPool *pool, void (*func)(PgSocket *sk)) +{ + List *item; + + statlist_for_each(item, &pool->idle_server_list) + func(container_of(item, PgSocket, head)); + + statlist_for_each(item, &pool->used_server_list) + func(container_of(item, PgSocket, head)); + + statlist_for_each(item, &pool->tested_server_list) + func(container_of(item, PgSocket, head)); + + statlist_for_each(item, &pool->active_server_list) + func(container_of(item, PgSocket, head)); + + statlist_for_each(item, &pool->new_server_list) + func(container_of(item, PgSocket, head)); +} + +static void tag_dirty(PgSocket *sk) +{ + sk->close_needed = 1; +} + +void tag_database_dirty(PgDatabase *db) +{ + List *item; + PgPool *pool; + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + if (pool->db == db) + for_each_server(pool, tag_dirty); + } +} + + diff --git a/src/objects.h b/src/objects.h new file mode 100644 index 0000000..9d95ad5 --- /dev/null +++ b/src/objects.h @@ -0,0 +1,63 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +extern StatList user_list; +extern StatList pool_list; +extern StatList database_list; +extern StatList login_client_list; +extern StatList free_server_list; +extern StatList free_client_list; +extern StatList login_client_list; + +PgDatabase *find_database(const char *name); +PgUser *find_user(const char *name); +PgPool *get_pool(PgDatabase *, PgUser *); +bool find_server(PgSocket *client); +void release_server(PgSocket *server); +bool finish_client_login(PgSocket *client); + +PgSocket * accept_client(int sock, const struct sockaddr_in *addr, bool is_unix); +void disconnect_server(PgSocket *server, bool notify, const char *reason); +void disconnect_client(PgSocket *client, bool notify, const char *reason); + +PgDatabase * add_database(const char *name); +PgUser * add_user(const char *name, const char *passwd); +PgUser * force_user(PgDatabase *db, const char *username, const char *passwd); + +void accept_cancel_request(PgSocket *req); +void forward_cancel_request(PgSocket *server); + +void launch_new_connection(PgPool *pool); + +bool use_client_socket(int fd, PgAddr *addr, const char *dbname, const char *username, uint64 ckey, int oldfd, int linkfd); +bool use_server_socket(int fd, PgAddr *addr, const char *dbname, const char *username, uint64 ckey, int oldfd, int linkfd); + +void pause_client(PgSocket *client); +void activate_client(PgSocket *client); + +void change_client_state(PgSocket *client, SocketState newstate); +void change_server_state(PgSocket *server, SocketState newstate); + +int get_active_client_count(void); +int get_active_server_count(void); + +void tag_database_dirty(PgDatabase *db); +void for_each_server(PgPool *pool, void (*func)(PgSocket *sk)); + +void create_auth_cache(void); + diff --git a/src/pktbuf.c b/src/pktbuf.c new file mode 100644 index 0000000..ba65042 --- /dev/null +++ b/src/pktbuf.c @@ -0,0 +1,405 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Packet writing and sending. + */ + +#include "bouncer.h" + +static void pktbuf_free(PktBuf *buf) +{ + if (buf->fixed_buf) + return; + + log_debug("pktbuf_free(%p)", buf); + if (buf->buf) + free(buf->buf); + if (buf->ev) + free(buf->ev); + free(buf); +} + +PktBuf *pktbuf_dynamic(int start_len) +{ + PktBuf *buf = zmalloc(sizeof(PktBuf)); + log_debug("pktbuf_dynamic(%d): %p", start_len, buf); + if (!buf) + return NULL; + + buf->ev = zmalloc(sizeof(*buf->ev)); + if (!buf->ev) { + pktbuf_free(buf); + return NULL; + } + buf->buf = malloc(start_len); + if (!buf->buf) { + pktbuf_free(buf); + return NULL; + } + buf->buf_len = start_len; + return buf; +} + +void pktbuf_static(PktBuf *buf, uint8 *data, int len) +{ + memset(buf, 0, sizeof(*buf)); + buf->buf = data; + buf->buf_len = len; + buf->fixed_buf = 1; +} + +bool pktbuf_send_immidiate(PktBuf *buf, PgSocket *sk) +{ + int fd = sbuf_socket(&sk->sbuf); + uint8 *pos = buf->buf + buf->send_pos; + int amount = buf->write_pos - buf->send_pos; + int res; + + if (buf->failed) + return false; + res = safe_send(fd, pos, amount, 0); + if (res < 0) { + log_error("pktbuf_send_immidiate: %s", strerror(errno)); + } + return res == amount; +} + +static void pktbuf_send_func(int fd, short flags, void *arg) +{ + PktBuf *buf = arg; + int amount, res; + + log_debug("pktbuf_send_func(%d, %d, %p)", fd, (int)flags, buf); + + if (buf->failed) + return; + + amount = buf->write_pos - buf->send_pos; + res = safe_send(fd, buf->buf + buf->send_pos, amount, 0); + if (res < 0) { + if (res == EAGAIN) { + res = 0; + } else { + log_error("pktbuf_send_func: %s", strerror(errno)); + pktbuf_free(buf); + return; + } + } + buf->send_pos += res; + + if (buf->send_pos < buf->write_pos) { + event_set(buf->ev, fd, EV_WRITE, pktbuf_send_func, buf); + event_add(buf->ev, NULL); + } else + pktbuf_free(buf); +} + +void pktbuf_send_queued(PktBuf *buf, PgSocket *sk) +{ + int fd = sbuf_socket(&sk->sbuf); + + Assert(!buf->sending); + Assert(!buf->fixed_buf); + + if (buf->failed) { + send_pooler_error(sk, true, "result prepare failed"); + pktbuf_free(buf); + } else { + buf->sending = 1; + pktbuf_send_func(fd, EV_WRITE, buf); + } +} + +static void make_room(PktBuf *buf, int len) +{ + int newlen = buf->buf_len; + int need = buf->write_pos + len; + void *ptr; + + if (newlen >= need) + return; + + if (buf->failed) + return; + + if (buf->fixed_buf) { + buf->failed = 1; + return; + } + + while (newlen < need) + newlen = newlen * 2; + + log_debug("make_room(%p, %d): realloc newlen=%d", + buf, len, newlen); + ptr = realloc(buf->buf, newlen); + if (!ptr) { + buf->failed = 1; + } else { + buf->buf = ptr; + buf->buf_len = newlen; + } +} + +void pktbuf_put_char(PktBuf *buf, char val) +{ + make_room(buf, 1); + if (buf->failed) + return; + + buf->buf[buf->write_pos++] = val; +} + +void pktbuf_put_uint16(PktBuf *buf, uint16 val) +{ + make_room(buf, 4); + if (buf->failed) + return; + + buf->buf[buf->write_pos++] = (val >> 8) & 255; + buf->buf[buf->write_pos++] = val & 255; +} + +void pktbuf_put_uint32(PktBuf *buf, uint32 val) +{ + uint8 *pos; + + make_room(buf, 4); + if (buf->failed) + return; + + pos = buf->buf + buf->write_pos; + pos[0] = (val >> 24) & 255; + pos[1] = (val >> 16) & 255; + pos[2] = (val >> 8) & 255; + pos[3] = val & 255; + buf->write_pos += 4; +} + +void pktbuf_put_uint64(PktBuf *buf, uint64 val) +{ + pktbuf_put_uint32(buf, val >> 32); + pktbuf_put_uint32(buf, (uint32)val); +} + +void pktbuf_put_bytes(PktBuf *buf, const void *data, int len) +{ + make_room(buf, len); + if (buf->failed) + return; + memcpy(buf->buf + buf->write_pos, data, len); + buf->write_pos += len; +} + +void pktbuf_put_string(PktBuf *buf, const char *str) +{ + int len = strlen(str); + pktbuf_put_bytes(buf, str, len + 1); +} + +/* + * write header, remember pos to write length later. + */ +void pktbuf_start_packet(PktBuf *buf, int type) +{ + if (buf->failed) + return; + + if (type < 256) { + /* new-style packet */ + pktbuf_put_char(buf, type); + buf->pktlen_pos = buf->write_pos; + pktbuf_put_uint32(buf, 0); + } else { + /* old-style packet */ + buf->pktlen_pos = buf->write_pos; + pktbuf_put_uint32(buf, 0); + pktbuf_put_uint32(buf, type); + } +} + +void pktbuf_finish_packet(PktBuf *buf) +{ + uint8 *pos; + unsigned len; + + if (buf->failed) + return; + + len = buf->write_pos - buf->pktlen_pos; + pos = buf->buf + buf->pktlen_pos; + buf->pktlen_pos = 0; + + *pos++ = (len >> 24) & 255; + *pos++ = (len >> 16) & 255; + *pos++ = (len >> 8) & 255; + *pos++ = len & 255; +} + +/* types: + * c - char/byte + * h - uint16 + * i - uint32 + * q - uint64 + * s - Cstring + * b - bytes + */ +void pktbuf_write_generic(PktBuf *buf, int type, const char *pktdesc, ...) +{ + va_list ap; + int len; + const char *adesc = pktdesc; + uint8 *bin; + + pktbuf_start_packet(buf, type); + + va_start(ap, pktdesc); + while (*adesc) { + switch (*adesc) { + case 'c': + pktbuf_put_char(buf, va_arg(ap, int)); + break; + case 'h': + pktbuf_put_uint16(buf, va_arg(ap, int)); + break; + case 'i': + pktbuf_put_uint32(buf, va_arg(ap, int)); + break; + case 'q': + pktbuf_put_uint64(buf, va_arg(ap, uint64)); + break; + case 's': + pktbuf_put_string(buf, va_arg(ap, char *)); + break; + case 'b': + bin = va_arg(ap, uint8 *); + len = va_arg(ap, int); + pktbuf_put_bytes(buf, bin, len); + break; + default: + fatal("bad pktdesc: %s", pktdesc); + } + adesc++; + } + va_end(ap); + + /* set correct length */ + pktbuf_finish_packet(buf); +} + + +/* send resultset column info + * tupdesc keys: + * 'i' - int4 + * 'q' - int8 + * 's' - string + * 'T' - usec_t to date + */ +void pktbuf_write_RowDescription(PktBuf *buf, const char *tupdesc, ...) +{ + va_list ap; + char *name; + int i, ncol = strlen(tupdesc); + + log_noise("write RowDescription"); + + pktbuf_start_packet(buf, 'T'); + + pktbuf_put_uint16(buf, ncol); + + va_start(ap, tupdesc); + for (i = 0; i < ncol; i++) { + name = va_arg(ap, char *); + + /* Fields: name, reloid, colnr, oid, typsize, typmod, fmt */ + pktbuf_put_string(buf, name); + pktbuf_put_uint32(buf, 0); + pktbuf_put_uint16(buf, 0); + if (tupdesc[i] == 's') { + pktbuf_put_uint32(buf, TEXTOID); + pktbuf_put_uint16(buf, -1); + } else if (tupdesc[i] == 'i') { + pktbuf_put_uint32(buf, INT4OID); + pktbuf_put_uint16(buf, 4); + } else if (tupdesc[i] == 'q') { + pktbuf_put_uint32(buf, INT8OID); + pktbuf_put_uint16(buf, 8); + } else if (tupdesc[i] == 'T') { + pktbuf_put_uint32(buf, TEXTOID); + pktbuf_put_uint16(buf, -1); + } else + fatal("bad tupdesc"); + pktbuf_put_uint32(buf, 0); + pktbuf_put_uint16(buf, 0); + } + va_end(ap); + + /* set correct length */ + pktbuf_finish_packet(buf); +} + +/* + * send DataRow. + * + * tupdesc keys: + * 'i' - int4 + * 'q' - int8 + * 's' - string + * 'T' - usec_t to date + */ +void pktbuf_write_DataRow(PktBuf *buf, const char *tupdesc, ...) +{ + char tmp[32]; + const char *val = NULL; + int i, len, ncol = strlen(tupdesc); + va_list ap; + + pktbuf_start_packet(buf, 'D'); + pktbuf_put_uint16(buf, ncol); + + va_start(ap, tupdesc); + for (i = 0; i < ncol; i++) { + if (tupdesc[i] == 'i') { + sprintf(tmp, "%d", va_arg(ap, int)); + val = tmp; + } else if (tupdesc[i] == 'q') { + sprintf(tmp, "%llu", (unsigned long long)va_arg(ap, uint64)); + val = tmp; + } else if (tupdesc[i] == 's') { + val = va_arg(ap, char *); + } else if (tupdesc[i] == 'T') { + usec_t time = va_arg(ap, usec_t); + val = format_date(time); + } else + fatal("bad tupdesc: %s", tupdesc); + + if (val) { + len = strlen(val); + pktbuf_put_uint32(buf, len + 1); + pktbuf_put_string(buf, val); + } else { + /* NULL */ + pktbuf_put_uint32(buf, -1); + } + } + va_end(ap); + + pktbuf_finish_packet(buf); +} + diff --git a/src/pktbuf.h b/src/pktbuf.h new file mode 100644 index 0000000..057f119 --- /dev/null +++ b/src/pktbuf.h @@ -0,0 +1,136 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Safe & easy creation of PostgreSQL packets. + */ + +typedef struct PktBuf PktBuf; +struct PktBuf { + uint8 *buf; + int buf_len; + int write_pos; + int pktlen_pos; + + int send_pos; + struct event *ev; + + unsigned failed:1; + unsigned sending:1; + unsigned fixed_buf:1; +}; + +/* + * pktbuf creation + */ +PktBuf *pktbuf_dynamic(int start_len); +void pktbuf_static(PktBuf *buf, uint8 *data, int len); + +/* + * sending + */ +bool pktbuf_send_immidiate(PktBuf *buf, PgSocket *sk); +void pktbuf_send_queued(PktBuf *buf, PgSocket *sk); + +/* + * low-level ops + */ +void pktbuf_start_packet(PktBuf *buf, int type); +void pktbuf_put_char(PktBuf *buf, char val); +void pktbuf_put_uint16(PktBuf *buf, uint16 val); +void pktbuf_put_uint32(PktBuf *buf, uint32 val); +void pktbuf_put_uint64(PktBuf *buf, uint64 val); +void pktbuf_put_string(PktBuf *buf, const char *str); +void pktbuf_put_bytes(PktBuf *buf, const void *data, int len); +void pktbuf_finish_packet(PktBuf *buf); +#define pktbuf_written(buf) ((buf)->write_pos) + + +/* + * Packet writing + */ +void pktbuf_write_generic(PktBuf *buf, int type, const char *fmt, ...); +void pktbuf_write_RowDescription(PktBuf *buf, const char *tupdesc, ...); +void pktbuf_write_DataRow(PktBuf *buf, const char *tupdesc, ...); + +/* + * Shortcuts for actual packets. + */ +#define pktbuf_write_ParameterStatus(buf, key, val) \ + pktbuf_write_generic(buf, 'S', "ss", key, val) + +#define pktbuf_write_AuthenticationOk(buf) \ + pktbuf_write_generic(buf, 'R', "i", 0) + +#define pktbuf_write_ReadyForQuery(buf) \ + pktbuf_write_generic(buf, 'Z', "c", 'I') + +#define pktbuf_write_CommandComplete(buf, desc) \ + pktbuf_write_generic(buf, 'C', "s", desc) + +#define pktbuf_write_BackendKeyData(buf, key) \ + pktbuf_write_generic(buf, 'K', "b", key, 8) + +#define pktbuf_write_CancelRequest(buf, key) \ + pktbuf_write_generic(buf, PKT_CANCEL, "b", key, 8) + +#define pktbuf_write_StartupMessage(buf, user, parms, parms_len) \ + pktbuf_write_generic(buf, PKT_STARTUP, "bsss", parms, parms_len, "user", user, "") + +#define pktbuf_write_PasswordMessage(buf, psw) \ + pktbuf_write_generic(buf, 'p', "s", psw) + +/* + * Shortcut for creating DataRow in memory. + */ + +#define BUILD_DataRow(reslen, dst, dstlen, args...) do { \ + PktBuf _buf; \ + pktbuf_static(&_buf, dst, dstlen); \ + pktbuf_write_DataRow(&_buf, ## args); \ + reslen = _buf.failed ? -1 : _buf.write_pos; \ +} while (0) + +/* + * Shortcuts for immidiate send of one packet. + */ + +#define SEND_wrap(buflen, pktfn, res, sk, args...) do { \ + uint8 _data[buflen]; PktBuf _buf; \ + pktbuf_static(&_buf, _data, sizeof(_data)); \ + pktfn(&_buf, ## args); \ + res = pktbuf_send_immidiate(&_buf, sk); \ +} while (0) + +#define SEND_RowDescription(res, sk, args...) \ + SEND_wrap(512, pktbuf_write_RowDescription, res, sk, ## args) + +#define SEND_generic(res, sk, args...) \ + SEND_wrap(512, pktbuf_write_generic, res, sk, ## args) + +#define SEND_ReadyForQuery(res, sk) \ + SEND_wrap(8, pktbuf_write_ReadyForQuery, res, sk) + +#define SEND_CancelRequest(res, sk, key) \ + SEND_wrap(16, pktbuf_write_CancelRequest, res, sk, key) + +#define SEND_PasswordMessage(res, sk, psw) \ + SEND_wrap(512, pktbuf_write_PasswordMessage, res, sk, psw) + + + diff --git a/src/pooler.c b/src/pooler.c new file mode 100644 index 0000000..2f26e49 --- /dev/null +++ b/src/pooler.c @@ -0,0 +1,269 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Handling of pooler listening sockets + */ + +#include "bouncer.h" + +static int fd_net = 0; +static int fd_unix = 0; +static struct event ev_net; +static struct event ev_unix; +static int suspended = 0; + +static struct event ev_err; +static struct timeval err_timeout = {5, 0}; + +static void cleanup_unix_socket(void) +{ + char fn[256]; + if (!cf_unix_socket_dir || suspended) + return; + snprintf(fn, sizeof(fn), "%s/.s.PGSQL.%d", + cf_unix_socket_dir, cf_listen_port); + unlink(fn); +} + +void get_pooler_fds(int *p_net, int *p_unix) +{ + *p_net = fd_net; + *p_unix = fd_unix; +} + +static int create_unix_socket(const char *socket_dir, int listen_port) +{ + struct sockaddr_un un; + int res, sock; + char lockfile[256]; + struct stat st; + + /* fill sockaddr struct */ + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), + "%s/.s.PGSQL.%d", socket_dir, listen_port); + + /* check for lockfile */ + snprintf(lockfile, sizeof(lockfile), "%s.lock", un.sun_path); + res = lstat(lockfile, &st); + if (res == 0) + fatal("unix port %d is in use", listen_port); + + /* expect old bouncer gone */ + unlink(un.sun_path); + + /* create socket */ + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) + fatal_perror("socket"); + + /* bind it */ + res = bind(sock, (const struct sockaddr *)&un, sizeof(un)); + if (res < 0) + fatal_perror("bind"); + + /* remove socket on shutdown */ + atexit(cleanup_unix_socket); + + /* set common options */ + tune_socket(sock, true); + + /* finally, accept connections */ + res = listen(sock, 100); + if (res < 0) + fatal_perror("listen"); + + res = chmod(un.sun_path, 0777); + if (res < 0) + fatal_perror("chmod"); + + log_info("listening on unix:%s", un.sun_path); + + return sock; +} + +static int create_net_socket(const char *listen_addr, int listen_port) +{ + int sock; + struct sockaddr_in sa; + int res; + int val; + + /* create socket */ + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) + fatal_perror("socket"); + + /* parse address */ + memset(&sa, 0, sizeof(sa)); + sa.sin_family = AF_INET; + sa.sin_port = htons(cf_listen_port); + if (strcmp(listen_addr, "*") == 0) { + sa.sin_addr.s_addr = htonl(INADDR_ANY); + } else { + sa.sin_addr.s_addr = inet_addr(listen_addr); + if (sa.sin_addr.s_addr == INADDR_NONE) + fatal("cannot parse addr: '%s'", listen_addr); + } + + /* relaxed binding */ + val = 1; + res = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt"); + + /* bind to address */ + res = bind(sock, (struct sockaddr *)&sa, sizeof(sa)); + if (res < 0) + fatal_perror("bind"); + + /* set common options */ + tune_socket(sock, false); + +#ifdef TCP_DEFER_ACCEPT + /* + * Notify pooler only when also data is arrived. + * + * optval specifies how long after connection attempt to wait for data. + * + * Related to tcp_synack_retries sysctl, default 5 (corresponds 180 secs). + */ + if (cf_tcp_defer_accept > 0) { + val = cf_tcp_defer_accept; + res = setsockopt(sock, IPPROTO_TCP, TCP_DEFER_ACCEPT, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt TCP_DEFER_ACCEPT"); + } +#endif + + /* finally, accept connections */ + res = listen(sock, 100); + if (res < 0) + fatal_perror("listen"); + + log_info("listening on %s:%d", cf_listen_addr, cf_listen_port); + + return sock; +} + +static void err_wait_func(int sock, short flags, void *arg) +{ + resume_pooler(); +} + +/* got new connection, associate it with client struct */ +static void +pool_accept(int sock, short flags, void *is_unix) +{ + int fd; + union { + struct sockaddr_in in; + struct sockaddr_un un; + struct sockaddr sa; + } addr; + socklen_t len = sizeof(addr); + + /* get fd */ + fd = accept(sock, &addr.sa, &len); + if (fd < 0) { + /* + * probably fd limit, pointess to try often + * wait a bit, hope that admin resolves somehow + */ + log_error("accept() failed: %s", strerror(errno)); + suspend_pooler(); + evtimer_set(&ev_err, err_wait_func, NULL); + evtimer_add(&ev_err, &err_timeout); + return; + } + + log_noise("new fd from accept=%d", fd); + if (is_unix) { + log_debug("P: new unix client"); + { + uid_t uid; + log_noise("getuid(): %d", (int)getuid()); + if (get_unix_peer_uid(fd, &uid)) + log_noise("unix peer uid: %d", (int)uid); + else + log_noise("unix peer uid failed"); + } + accept_client(fd, NULL, true); + } else { + log_debug("P: new tcp client"); + accept_client(fd, &addr.in, false); + } +} + +bool +use_pooler_socket(int sock, bool is_unix) +{ + tune_socket(sock, is_unix); + + if (is_unix) + fd_unix = sock; + else + fd_net = sock; + return true; +} + +void +suspend_pooler(void) +{ + suspended = 1; + + if (fd_net) + event_del(&ev_net); + if (fd_unix) + event_del(&ev_unix); +} + +void +resume_pooler(void) +{ + suspended = 0; + + if (fd_unix) { + event_set(&ev_unix, fd_unix, EV_READ | EV_PERSIST, pool_accept, "1"); + event_add(&ev_unix, NULL); + } + + if (fd_net) { + event_set(&ev_net, fd_net, EV_READ | EV_PERSIST, pool_accept, NULL); + event_add(&ev_net, NULL); + } +} + +/* listen on socket - should happen after all other initializations */ +void +pooler_setup(void) +{ + if (cf_listen_addr && !fd_net) + fd_net = create_net_socket(cf_listen_addr, cf_listen_port); + + if (cf_unix_socket_dir && !fd_unix) + fd_unix = create_unix_socket(cf_unix_socket_dir, cf_listen_port); + + if (!fd_net && !fd_unix) + fatal("nowhere to listen on"); + + resume_pooler(); +} + diff --git a/src/pooler.h b/src/pooler.h new file mode 100644 index 0000000..a4536ca --- /dev/null +++ b/src/pooler.h @@ -0,0 +1,24 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +void pooler_setup(void); +bool use_pooler_socket(int fd, bool is_unix); +void resume_pooler(void); +void suspend_pooler(void); +void get_pooler_fds(int *p_net, int *p_unix); + diff --git a/src/proto.c b/src/proto.c new file mode 100644 index 0000000..aef48b0 --- /dev/null +++ b/src/proto.c @@ -0,0 +1,337 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Pieces that need to have detailed info about protocol. + */ + +#include "bouncer.h" + +/* + * parse protocol header from MBuf + */ + +/* parses pkt header from buffer, returns false if failed */ +bool get_header(MBuf *pkt, unsigned *pkt_type_p, unsigned *pkt_len_p) +{ + unsigned type; + unsigned len; + unsigned code; + + if (mbuf_avail(pkt) < 5) { + log_noise("get_header: less then 5 bytes available"); + return false; + } + type = mbuf_get_char(pkt); + if (type != 0) { + len = mbuf_get_uint32(pkt) + 1; + } else { + if (mbuf_get_char(pkt) != 0) { + log_noise("get_header: unknown special pkt"); + return false; + } + /* dont tolerate partial pkt */ + if (mbuf_avail(pkt) < 6) { + log_noise("get_header: less that 6 bytes for special pkt"); + return false; + } + len = mbuf_get_uint16(pkt); + code = mbuf_get_uint32(pkt); + if (code == 80877102) + type = PKT_CANCEL; + else if (code == 80877103) + type = PKT_SSLREQ; + else if ((code >> 16) == 3 && (code & 0xFFFF) < 2) + type = PKT_STARTUP; + else { + log_noise("get_header: unknown special pkt: len=%u code=%u", len, code); + return false; + } + } + *pkt_type_p = type; + *pkt_len_p = len; + return true; +} + + +/* + * Send error message packet to client. + */ + +bool send_pooler_error(PgSocket *client, bool send_ready, const char *msg) +{ + uint8 tmpbuf[512]; + PktBuf buf; + + slog_error(client, "Pooler Error: %s", msg); + + pktbuf_static(&buf, tmpbuf, sizeof(tmpbuf)); + pktbuf_write_generic(&buf, 'E', "cscscsc", + 'S', "ERROR", 'C', "08P01", 'M', msg, 0); + if (send_ready) + pktbuf_write_ReadyForQuery(&buf); + return pktbuf_send_immidiate(&buf, client); +} + +/* + * Parse server error message and log it. + */ +void log_server_error(const char *note, MBuf *pkt) +{ + const char *level = NULL, *msg = NULL, *val; + int type; + while (mbuf_avail(pkt)) { + type = mbuf_get_char(pkt); + if (type == 0) + break; + val = mbuf_get_string(pkt); + if (!val) + break; + if (type == 'S') + level = val; + else if (type == 'M') + msg = val; + } + if (!msg || !level) + log_error("%s: corrupt error message", note); + else + log_error("%s: %s: %s", note, level, msg); +} + + +/* + * Preparation of welcome message for client connection. + */ + +/* add another server parameter packet to cache */ +bool add_welcome_parameter(PgSocket *server, + unsigned pkt_type, unsigned pkt_len, MBuf *pkt) +{ + PgDatabase *db = server->pool->db; + PktBuf msg; + const char *key, *val; + + if (db->welcome_msg_ready) + return true; + + /* incomplete startup msg from server? */ + if (pkt_len - 5 > mbuf_avail(pkt)) + return false; + + pktbuf_static(&msg, db->welcome_msg + db->welcome_msg_len, + sizeof(db->welcome_msg) - db->welcome_msg_len); + + if (db->welcome_msg_len == 0) + pktbuf_write_AuthenticationOk(&msg); + + key = mbuf_get_string(pkt); + val = mbuf_get_string(pkt); + if (!key || !val) { + log_error("broken ParameterStatus packet"); + return false; + } + log_debug("S: param: %s = %s", key, val); + pktbuf_write_ParameterStatus(&msg, key, val); + db->welcome_msg_len += pktbuf_written(&msg); + + return true; +} + +/* all parameters processed */ +void finish_welcome_msg(PgSocket *server) +{ + PgDatabase *db = server->pool->db; + if (db->welcome_msg_ready) + return; + db->welcome_msg_ready = 1; +} + +bool welcome_client(PgSocket *client) +{ + int res; + uint8 buf[1024]; + PktBuf msg; + PgDatabase *db = client->pool->db; + + log_noise("P: welcome_client"); + if (!db->welcome_msg_ready) + return false; + + pktbuf_static(&msg, buf, sizeof(buf)); + pktbuf_put_bytes(&msg, db->welcome_msg, db->welcome_msg_len); + + /* give each client its own cancel key */ + get_random_bytes(client->cancel_key, 8); + pktbuf_write_BackendKeyData(&msg, client->cancel_key); + pktbuf_write_ReadyForQuery(&msg); + + /* send all together */ + res = pktbuf_send_immidiate(&msg, client); + if (!res) + log_warning("unhandled failure to send welcome_msg"); + + return true; +} + +/* + * Password authentication for server + */ + +/* actual packet send */ +static void send_password(PgSocket *server, const char *enc_psw) +{ + bool res; + SEND_PasswordMessage(res, server, enc_psw); + if (!res) + disconnect_server(server, true, + "partial send unhandled in send_password"); +} + +static void login_clear_psw(PgSocket *server) +{ + log_debug("P: send clear password"); + send_password(server, server->pool->user->passwd); +} + +static void login_crypt_psw(PgSocket *server, const uint8 *salt) +{ + char saltbuf[3]; + const char *enc; + PgUser *user = server->pool->user; + + log_debug("P: send crypt password"); + strncpy(saltbuf, (char *)salt, 2); + enc = pg_crypt(user->passwd, saltbuf); + send_password(server, enc); +} + + +static void login_md5_psw(PgSocket *server, const uint8 *salt) +{ + char txt[MD5_PASSWD_LEN + 1], *src; + PgUser *user = server->pool->user; + + log_debug("P: send md5 password"); + if (!isMD5(user->passwd)) { + pg_md5_encrypt(user->passwd, user->name, strlen(user->name), txt); + src = txt + 3; + } else + src = user->passwd + 3; + pg_md5_encrypt(src, (char *)salt, 4, txt); + + send_password(server, txt); +} + +/* answer server authentication request */ +bool answer_authreq(PgSocket *server, + unsigned pkt_type, unsigned pkt_len, + MBuf *pkt) +{ + unsigned cmd; + const uint8 *salt; + + if (pkt_len < 5 + 4) + return false; + if (mbuf_avail(pkt) < pkt_len - 5) + return false; + + cmd = mbuf_get_uint32(pkt); + switch (cmd) { + case 0: + log_debug("S: auth ok"); + break; + case 3: + log_debug("S: req cleartext password"); + login_clear_psw(server); + break; + case 4: + if (pkt_len < 5 + 4 + 2) + return false; + log_debug("S: req crypt psw"); + salt = mbuf_get_bytes(pkt, 2); + login_crypt_psw(server, salt); + break; + case 5: + if (pkt_len < 5 + 4 + 4) + return false; + log_debug("S: req md5-crypted psw"); + salt = mbuf_get_bytes(pkt, 4); + login_md5_psw(server, salt); + break; + case 2: /* kerberos */ + case 6: /* scm something */ + log_error("unsupported auth method: %d", cmd); + default: + log_error("unknown auth method: %d", cmd); + } + return true; +} + +bool send_startup_packet(PgSocket *server) +{ + PgDatabase *db = server->pool->db; + const char *username = server->pool->user->name; + PktBuf pkt; + uint8 buf[512]; + + pktbuf_static(&pkt, buf, sizeof(buf)); + pktbuf_write_StartupMessage(&pkt, username, + db->startup_params, + db->startup_params_len); + return pktbuf_send_immidiate(&pkt, server); +} + +int scan_text_result(MBuf *pkt, const char *tupdesc, ...) +{ + char *val = NULL; + int len; + unsigned ncol, i; + va_list ap; + + ncol = mbuf_get_uint16(pkt); + if (ncol != strlen(tupdesc)) + fatal("different number of cols"); + + va_start(ap, tupdesc); + for (i = 0; i < ncol; i++) { + len = mbuf_get_uint32(pkt); + if (len < 0) + val = NULL; + else + val = (char *)mbuf_get_bytes(pkt, len); + + if (tupdesc[i] == 'i') { + int *dst_p = va_arg(ap, int *); + *dst_p = atoi(val); + } else if (tupdesc[i] == 'q') { + uint64 *dst_p = va_arg(ap, uint64 *); + *dst_p = atoll(val); + } else if (tupdesc[i] == 's') { + char **dst_p = va_arg(ap, char **); + *dst_p = val; + } else + fatal("bad tupdesc: %s", tupdesc); + } + va_end(ap); + + if (mbuf_avail(pkt)) + fatal("scan_text_result: unparsed data"); + + return ncol; +} + diff --git a/src/proto.h b/src/proto.h new file mode 100644 index 0000000..eeaf343 --- /dev/null +++ b/src/proto.h @@ -0,0 +1,33 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +bool get_header(MBuf *pkt, unsigned *pkt_type_p, unsigned *pkt_len_p); + +bool send_pooler_error(PgSocket *client, bool send_ready, const char *msg); +void log_server_error(const char *note, MBuf *pkt); + +bool add_welcome_parameter(PgSocket *server, unsigned pkt_type, unsigned pkt_len, MBuf *pkt); +void finish_welcome_msg(PgSocket *server); +bool welcome_client(PgSocket *client); + +bool answer_authreq(PgSocket *server, unsigned pkt_type, unsigned pkt_len, MBuf *pkt); + +bool send_startup_packet(PgSocket *server); + +int scan_text_result(MBuf *pkt, const char *tupdesc, ...); + diff --git a/src/sbuf.c b/src/sbuf.c new file mode 100644 index 0000000..f64c75d --- /dev/null +++ b/src/sbuf.c @@ -0,0 +1,500 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stream buffer + * + * The task is to copy data from one socket to another + * efficiently, while allowing callbacks to look + * at packet headers. + */ + +#include "bouncer.h" + +/* + * if less that this amount of data is pending, then + * prefer to merge if with next recv() + */ +#define SMALL_PKT 16 + +/* declare static stuff */ +static void sbuf_queue_send(SBuf *sbuf); +static bool sbuf_send_pending(SBuf *sbuf); +static bool sbuf_process_pending(SBuf *sbuf); +static void sbuf_connect_cb(int sock, short flags, void *arg); +static void sbuf_recv_cb(int sock, short flags, void *arg); +static void sbuf_send_cb(int sock, short flags, void *arg); +static void sbuf_try_resync(SBuf *sbuf); +static void sbuf_wait_for_data(SBuf *sbuf); + +/* + * Call proto callback with proper MBuf. + * + * If callback returns true it used one of sbuf_prepare_* on sbuf, + * and processing can continue. + * + * If it returned false it used sbuf_pause(), sbuf_close() or simply + * wants to wait for next event loop (eg. too few data available). + * Callee should not touch sbuf in that case and just return to libevent. + */ +static inline bool sbuf_call_proto(SBuf *sbuf, int event) +{ + MBuf mbuf; + uint8 *pos = sbuf->buf + sbuf->pkt_pos; + int avail = sbuf->recv_pos - sbuf->pkt_pos; + + Assert(avail >= 0); + Assert(pos + avail <= sbuf->buf + cf_sbuf_len); + Assert(event != SBUF_EV_READ || avail > 0); + + mbuf_init(&mbuf, pos, avail); + return sbuf->proto_handler(sbuf, event, &mbuf, sbuf->arg); +} + +/* lets wait for new data */ +static void sbuf_wait_for_data(SBuf *sbuf) +{ + event_set(&sbuf->ev, sbuf->sock, EV_READ | EV_PERSIST, sbuf_recv_cb, sbuf); + event_add(&sbuf->ev, NULL); +} + +/* initialize SBuf with proto handler */ +void sbuf_init(SBuf *sbuf, sbuf_proto_cb_t proto_fn, void *arg) +{ + memset(sbuf, 0, sizeof(*sbuf)); + sbuf->arg = arg; + sbuf->proto_handler = proto_fn; +} + +/* got new socket from accept() */ +void sbuf_accept(SBuf *sbuf, int sock, bool is_unix) +{ + Assert(sbuf->pkt_pos == 0); + Assert(sbuf->recv_pos == 0); + Assert(sbuf->send_pos == 0); + + tune_socket(sock, is_unix); + sbuf->sock = sock; + sbuf->is_unix = is_unix; + + if (!cf_reboot) { + sbuf_wait_for_data(sbuf); + + /* socket should already have some data (linux only) */ + if (cf_tcp_defer_accept && !is_unix) + sbuf_recv_cb(sbuf->sock, EV_READ, sbuf); + } +} + +/* need to connect() to get a socket */ +void sbuf_connect(SBuf *sbuf, const PgAddr *addr, int timeout_sec) +{ + int res, sock, domain; + struct sockaddr_in sa_in; + struct sockaddr_un sa_un; + struct sockaddr *sa; + socklen_t len; + struct timeval timeout; + + /* prepare sockaddr */ + if (addr->is_unix) { + sa = (void*)&sa_un; + len = sizeof(sa_un); + memset(sa, 0, len); + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path), + "%s/.s.PGSQL.%d", cf_unix_socket_dir, addr->port); + domain = AF_UNIX; + } else { + sa = (void*)&sa_in; + len = sizeof(sa_in); + memset(sa, 0, len); + sa_in.sin_family = AF_INET; + sa_in.sin_addr = addr->ip_addr; + sa_in.sin_port = htons(addr->port); + domain = AF_INET; + } + + /* + * common stuff + */ + sock = socket(domain, SOCK_STREAM, 0); + if (sock < 0) { + /* probably fd limit, try to survive */ + log_error("sbuf_connect: socket() failed: %s", strerror(errno)); + sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED); + return; + } + + tune_socket(sock, addr->is_unix); + + sbuf->is_unix = addr->is_unix; + sbuf->sock = sock; + + timeout.tv_sec = timeout_sec; + timeout.tv_usec = 0; + + /* launch connection */ + res = connect(sock, sa, len); + log_noise("connect(%d)=%d", sock, res); + if (res == 0) { + /* unix socket gives connection immidiately */ + sbuf_connect_cb(sock, EV_WRITE, sbuf); + } else if (res < 0 && errno == EINPROGRESS) { + /* tcp socket needs waiting */ + event_set(&sbuf->ev, sock, EV_WRITE, sbuf_connect_cb, sbuf); + event_add(&sbuf->ev, &timeout); + } else { + /* failure */ + log_warning("connect failed: res=%d/err=%s", res, strerror(errno)); + close(sock); + sbuf->sock = 0; + sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED); + } +} + +/* dont wait for data on this socket */ +void sbuf_pause(SBuf *sbuf) +{ + Assert(sbuf->wait_send == 0); + + event_del(&sbuf->ev); +} + +/* resume from pause, start waiting for data */ +void sbuf_continue(SBuf *sbuf) +{ + sbuf_wait_for_data(sbuf); + + /* there is some data already received */ + sbuf_recv_cb(sbuf->sock, EV_READ, sbuf); +} + +/* + * Resume from pause and give socket over to external + * callback function. + * + * The callback will be called with arg given to sbuf_init. + */ +void sbuf_continue_with_callback(SBuf *sbuf, sbuf_libevent_cb user_cb) +{ + event_set(&sbuf->ev, sbuf->sock, EV_READ | EV_PERSIST, + user_cb, sbuf->arg); + event_add(&sbuf->ev, NULL); +} + +/* socket cleanup & close */ +void sbuf_close(SBuf *sbuf) +{ + /* keep handler & arg values */ + if (sbuf->sock > 0) { + event_del(&sbuf->ev); + safe_close(sbuf->sock); + } + sbuf->dst = NULL; + sbuf->sock = 0; + sbuf->pkt_pos = sbuf->pkt_remain = sbuf->recv_pos = 0; + sbuf->pkt_skip = sbuf->wait_send = sbuf->pkt_flush = 0; + sbuf->send_pos = sbuf->send_remain = 0; +} + +/* proto_fn tells to send some bytes to socket */ +void sbuf_prepare_send(SBuf *sbuf, SBuf *dst, unsigned amount, bool flush) +{ + Assert(sbuf->pkt_remain == 0); + Assert(sbuf->pkt_skip == 0 || sbuf->send_remain == 0); + Assert(!sbuf->pkt_flush || sbuf->send_remain == 0); + Assert(amount > 0); + + sbuf->pkt_skip = 0; + sbuf->pkt_remain = amount; + sbuf->pkt_flush = flush; + sbuf->dst = dst; +} + +/* proto_fn tells to skip sone amount of bytes */ +void sbuf_prepare_skip(SBuf *sbuf, int amount) +{ + Assert(sbuf->pkt_remain == 0); + Assert(sbuf->pkt_skip == 0 || sbuf->send_remain == 0); + Assert(!sbuf->pkt_flush || sbuf->send_remain == 0); + Assert(amount > 0); + + sbuf->pkt_skip = 1; + sbuf->pkt_remain = amount; + sbuf->pkt_flush = 0; + sbuf->dst = NULL; +} + +/* libevent EV_WRITE: called when dest socket is writable again */ +static void sbuf_send_cb(int sock, short flags, void *arg) +{ + bool res; + SBuf *sbuf = arg; + + sbuf->wait_send = 0; + res = sbuf_process_pending(sbuf); + if (res) + sbuf_wait_for_data(sbuf); +} + +/* socket is full, wait until its writable again */ +static void sbuf_queue_send(SBuf *sbuf) +{ + sbuf->wait_send = 1; + event_del(&sbuf->ev); + event_set(&sbuf->ev, sbuf->dst->sock, EV_WRITE, sbuf_send_cb, sbuf); + event_add(&sbuf->ev, NULL); +} + +/* + * Theres data in buffer to be sent. returns bool if processing can continue. + * + * Does not look at pkt_pos/remain fields, expects them to be merged to send_* + */ +static bool sbuf_send_pending(SBuf *sbuf) +{ + int res, avail; + uint8 *pos; + +try_more: + /* how much data is available for sending */ + avail = sbuf->recv_pos - sbuf->send_pos; + if (avail > sbuf->send_remain) + avail = sbuf->send_remain; + if (avail == 0) + return true; + + /* actually send it */ + pos = sbuf->buf + sbuf->send_pos; + res = safe_send(sbuf->dst->sock, pos, avail, 0); + if (res >= 0) { + sbuf->send_remain -= res; + sbuf->send_pos += res; + + if (res < avail) { + /* + * Should do sbuf_queue_send() immidiately? + * + * To be sure, lets run into EAGAIN. + */ + goto try_more; + } + return true; + } else if (errno == EAGAIN) { + sbuf_queue_send(sbuf); + return false; + } else { + sbuf_call_proto(sbuf, SBUF_EV_SEND_FAILED); + return false; + } +} + +/* process as much data as possible */ +static bool sbuf_process_pending(SBuf *sbuf) +{ + int avail; + bool full = sbuf->recv_pos == cf_sbuf_len; + bool res; + + while (1) { + Assert(sbuf->recv_pos >= sbuf->pkt_pos); + + /* + * Enough for now? + * + * The (avail <= SMALL_PKT) check is to avoid partial pkts. + * As SBuf should not assume knowledge about packets, + * the check is not done in !full case. Packet handler can + * then still notify about partial packet by returning false. + */ + avail = sbuf->recv_pos - sbuf->pkt_pos; + if (avail == 0 || (full && avail <= SMALL_PKT)) + break; + + /* handle proto if start of packet */ + if (sbuf->pkt_remain == 0) { /* start of new block */ + res = sbuf_call_proto(sbuf, SBUF_EV_READ); + if (!res) + return false; + Assert(sbuf->pkt_remain > 0); + } + + /* walk pkt, merge sends */ + if (avail > sbuf->pkt_remain) + avail = sbuf->pkt_remain; + if (!sbuf->pkt_skip) { + if (sbuf->send_remain == 0) + sbuf->send_pos = sbuf->pkt_pos; + sbuf->send_remain += avail; + } + sbuf->pkt_remain -= avail; + sbuf->pkt_pos += avail; + + /* send data */ + if (sbuf->pkt_skip || sbuf->pkt_flush) { + res = sbuf_send_pending(sbuf); + if (!res) + return false; + } + } + + return sbuf_send_pending(sbuf); +} + +/* reposition at buffer start again */ +static void sbuf_try_resync(SBuf *sbuf) +{ + int avail; + + if (sbuf->pkt_pos == 0) + return; + + if (sbuf->send_remain > 0) + avail = sbuf->recv_pos - sbuf->send_pos; + else + avail = sbuf->recv_pos - sbuf->pkt_pos; + + if (avail == 0) { + sbuf->recv_pos = sbuf->pkt_pos = sbuf->send_pos = 0; + } else if (avail <= SMALL_PKT) { + if (sbuf->send_remain > 0) { + memmove(sbuf->buf, sbuf->buf + sbuf->send_pos, avail); + sbuf->pkt_pos -= sbuf->send_pos; + sbuf->send_pos = 0; + sbuf->recv_pos = avail; + } else { + memmove(sbuf->buf, sbuf->buf + sbuf->pkt_pos, avail); + sbuf->send_pos = 0; + sbuf->pkt_pos = 0; + sbuf->recv_pos = avail; + } + } +} + +/* actually ask kernel for more data */ +static bool sbuf_actual_recv(SBuf *sbuf, int len) +{ + int got; + uint8 *pos; + + pos = sbuf->buf + sbuf->recv_pos; + got = safe_recv(sbuf->sock, pos, len, 0); + + if (got == 0) { + /* eof from socket */ + sbuf_call_proto(sbuf, SBUF_EV_RECV_FAILED); + return false; + } else if (got < 0) { + if (errno == EAGAIN) { + /* we tried too much, socket is empty. + act as zero bytes was read */ + got = 0; + } else { + /* some error occured */ + sbuf_call_proto(sbuf, SBUF_EV_RECV_FAILED); + return false; + } + } + sbuf->recv_pos += got; + return true; +} + +/* callback for libevent EV_READ */ +static void sbuf_recv_cb(int sock, short flags, void *arg) +{ + int free, ok; + SBuf *sbuf = arg; + + /* reading should be disabled when waiting */ + Assert(sbuf->wait_send == 0); + +try_more: + /* make room in buffer */ + sbuf_try_resync(sbuf); + + /* + * FIXME: When called from sbuf_continue(), there is already + * data waiting. Thus there will be unneccesary recv(). + */ + free = cf_sbuf_len - sbuf->recv_pos; + if (free > SMALL_PKT) { + ok = sbuf_actual_recv(sbuf, free); + if (!ok) + return; + } + + /* now handle it */ + ok = sbuf_process_pending(sbuf); + + /* if the buffer is full, there can be more data available */ + if (ok && sbuf->recv_pos == cf_sbuf_len) + goto try_more; +} + +/* check if there is any error pending on socket */ +static bool sbuf_after_connect_check(SBuf *sbuf) +{ + int optval = 0, err; + socklen_t optlen = sizeof(optval); + + err = getsockopt(sbuf->sock, SOL_SOCKET, SO_ERROR, (void*)&optval, &optlen); + if (err < 0) { + log_error("sbuf_after_connect_check: getsockopt: %s", + strerror(errno)); + return false; + } + if (optval != 0) { + log_error("sbuf_after_connect_check: pending error: %s", + strerror(optval)); + return false; + } + return true; +} + +/* callback for libevent EV_WRITE when connecting */ +static void sbuf_connect_cb(int sock, short flags, void *arg) +{ + SBuf *sbuf = arg; + + if (flags & EV_WRITE) { + if (sbuf_after_connect_check(sbuf)) { + if (sbuf_call_proto(sbuf, SBUF_EV_CONNECT_OK)) + sbuf_wait_for_data(sbuf); + } else + sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED); + } else { + /* EV_TIMEOUT */ + sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED); + } +} + +/* send some data to listening socket */ +bool sbuf_answer(SBuf *sbuf, const void *buf, int len) +{ + int res; + if (sbuf->sock <= 0) + return false; + res = safe_send(sbuf->sock, buf, len, 0); + if (res < 0) + log_error("sbuf_answer: error sending: %s", strerror(errno)); + else if (res != len) + log_error("sbuf_answer: partial send: len=%d sent=%d", len, res); + return res == len; +} + diff --git a/src/sbuf.h b/src/sbuf.h new file mode 100644 index 0000000..56d553b --- /dev/null +++ b/src/sbuf.h @@ -0,0 +1,91 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +typedef enum { + SBUF_EV_READ, + SBUF_EV_RECV_FAILED, + SBUF_EV_SEND_FAILED, + SBUF_EV_CONNECT_FAILED, + SBUF_EV_CONNECT_OK +} SBufEvent; + +typedef struct SBuf SBuf; + +/* callback should return true if it used one of sbuf_prepare_* on sbuf, + false if it used sbuf_pause(), sbuf_close() or simply wants to wait for + next event loop (eg. too few data available). */ +typedef bool (*sbuf_proto_cb_t)(SBuf *sbuf, + SBufEvent evtype, + MBuf *mbuf, + void *arg); + +/* for some reason, libevent has no typedef for callback */ +typedef void (*sbuf_libevent_cb)(int, short, void *); + +struct SBuf { + /* libevent handle */ + struct event ev; + + /* protocol callback function */ + sbuf_proto_cb_t proto_handler; + void *arg; + + /* fd for this socket */ + int sock; + + /* dest SBuf for current packet */ + SBuf *dst; + + unsigned recv_pos; + unsigned pkt_pos; + unsigned pkt_remain; + unsigned send_pos; + unsigned send_remain; + + unsigned wait_send:1; + unsigned pkt_skip:1; + unsigned pkt_flush:1; + unsigned is_unix:1; + + uint8 buf[0]; +}; + +#define sbuf_socket(sbuf) ((sbuf)->sock) + +void sbuf_init(SBuf *sbuf, sbuf_proto_cb_t proto_fn, void *arg); +void sbuf_accept(SBuf *sbuf, int read_sock, bool is_unix); +void sbuf_connect(SBuf *sbuf, const PgAddr *addr, int timeout_sec); + +void sbuf_pause(SBuf *sbuf); +void sbuf_continue(SBuf *sbuf); +void sbuf_close(SBuf *sbuf); + +/* proto_fn can use those functions to order behaviour */ +void sbuf_prepare_send(SBuf *sbuf, SBuf *dst, unsigned amount, bool flush); +void sbuf_prepare_skip(SBuf *sbuf, int amount); + +bool sbuf_answer(SBuf *sbuf, const void *buf, int len); + +void sbuf_continue_with_callback(SBuf *sbuf, sbuf_libevent_cb cb); + +static inline bool sbuf_empty(SBuf *sbuf) +{ + return sbuf->send_pos == sbuf->recv_pos + && sbuf->pkt_remain == 0; +} + diff --git a/src/server.c b/src/server.c new file mode 100644 index 0000000..c89341e --- /dev/null +++ b/src/server.c @@ -0,0 +1,277 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Handling of server connections + */ + +#include "bouncer.h" + +/* process packets on server auth phase */ +static bool handle_server_startup(PgSocket *server, MBuf *pkt) +{ + unsigned pkt_type; + unsigned pkt_len; + SBuf *sbuf = &server->sbuf; + bool res = false; + + if (!get_header(pkt, &pkt_type, &pkt_len)) { + disconnect_server(server, true, "bad pkt in login phase"); + return false; + } + + if (pkt_len > mbuf_avail(pkt) + 5) { + disconnect_server(server, true, "partial pkt in login phase"); + return false; + } + + log_noise("S: pkt '%c', len=%d", pkt_type, pkt_len); + + switch (pkt_type) { + default: + slog_error(server, "unknown pkt from server: '%c'", pkt_type); + disconnect_server(server, true, "unknown pkt from server"); + break; + case 'E': /* ErrorResponse */ + log_server_error("S: login failed", pkt); + disconnect_server(server, true, "login failed"); + break; + + /* packets that need closer look */ + case 'R': /* AuthenticationXXX */ + log_debug("calling login_answer"); + res = answer_authreq(server, pkt_type, pkt_len, pkt); + break; + case 'S': /* ParameterStatus */ + res = add_welcome_parameter(server, pkt_type, pkt_len, pkt); + break; + case 'Z': /* ReadyForQuery */ + /* login ok */ + log_debug("server login ok, start accepting queries"); + server->ready = 1; + + finish_welcome_msg(server); + release_server(server); + + /* let the takeover process handle it */ + if (server->pool->admin) + takeover_login(server); + res = true; + break; + + /* ignorable packets */ + case 'K': /* BackendKeyData */ + if (mbuf_avail(pkt) >= 8) + memcpy(server->cancel_key, mbuf_get_bytes(pkt, 8), 8); + res = true; + break; + case 'N': /* NoticeResponse */ + slog_noise(server, "skipping pkt: %c", pkt_type); + res = true; + break; + } + + if (res) + sbuf_prepare_skip(sbuf, pkt_len); + + return res; +} + +/* process packets on logged in connection */ +static bool handle_server_work(PgSocket *server, MBuf *pkt) +{ + unsigned pkt_type; + unsigned pkt_len; + bool flush = 0; + bool ready = 0; + char state; + SBuf *sbuf = &server->sbuf; + PgSocket *client = server->link; + + Assert(!server->pool->admin); + + if (!get_header(pkt, &pkt_type, &pkt_len)) { + disconnect_server(server, true, "bad pkt header"); + return false; + } + slog_noise(server, "pkt='%c' len=%d", pkt_type, pkt_len); + + switch (pkt_type) { + default: + slog_error(server, "unknown pkt: '%c'", pkt_type); + disconnect_server(server, true, "unknown pkt"); + return false; + + /* pooling decisions will be based on this packet */ + case 'Z': /* ReadyForQuery */ + + /* if partial pkt, wait */ + if (mbuf_avail(pkt) == 0) + return false; + state = mbuf_get_char(pkt); + + /* set ready only if no tx */ + if (state == 'I') + ready = 1; + else if (cf_pool_mode == POOL_STMT) { + disconnect_server(server, true, + "Long transactions not allowed"); + return false; + } + + case 'E': /* ErrorResponse */ + case 'N': /* NoticeResponse */ + + /* above packers need to be sent immidiately */ + flush = 1; + + /* + * chat packets, but server (and thus pooler) + * is allowed to buffer them until Sync or Flush + * is sent by client. + */ + case '2': /* BindComplete */ + case '3': /* CloseComplete */ + case 'c': /* CopyDone(F/B) */ + case 'f': /* CopyFail(F/B) */ + case 'I': /* EmptyQueryResponse == CommandComplete */ + case 'V': /* FunctionCallResponse */ + case 'n': /* NoData */ + case 'G': /* CopyInResponse */ + case 'H': /* CopyOutResponse */ + case '1': /* ParseComplete */ + case 'A': /* NotificationResponse */ + case 's': /* PortalSuspended */ + case 'C': /* CommandComplete */ + + /* check if client wanted immidiate response */ + if (client && client->flush_req) { + flush = 1; + client->flush_req = 0; + } + + /* data packets, there will be more coming */ + case 'd': /* CopyData(F/B) */ + case 'D': /* DataRow */ + case 't': /* ParameterDescription */ + case 'S': /* ParameterStatus */ + case 'T': /* RowDescription */ + + if (client) { + sbuf_prepare_send(sbuf, &client->sbuf, pkt_len, flush); + } else { + if (server->state != SV_TESTED) + log_warning("got packet '%c' from server" + " when not linked", pkt_type); + sbuf_prepare_skip(sbuf, pkt_len); + } + break; + } + server->ready = ready; + + /* update stats */ + server->pool->stats.server_bytes += pkt_len; + if (server->ready && client) { + usec_t total; + Assert(client->query_start != 0); + + total = get_time_usec() - client->query_start; + client->query_start = 0; + server->pool->stats.query_time += total; + slog_debug(client, "query time: %d us", (int)total); + } + + if (ready && ( cf_pool_mode != POOL_SESSION + || server->state == SV_TESTED)) + release_server(server); + + return true; +} + +/* got connection, decide what to do */ +static bool handle_connect(PgSocket *server) +{ + bool res = false; + PgPool *pool = server->pool; + + if (!statlist_empty(&pool->cancel_req_list)) { + slog_debug(server, "use it for pending cancel req"); + /* if pending cancel req, send it */ + forward_cancel_request(server); + /* notify disconnect_server() that connect did not fail */ + server->ready = 1; + disconnect_server(server, false, "sent cancel req"); + } else { + /* proceed with login */ + res = send_startup_packet(server); + if (!res) + disconnect_server(server, false, "startup pkt failed"); + } + return res; +} + +/* callback from SBuf */ +bool server_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg) +{ + bool res = false; + PgSocket *server = arg; + + Assert(is_server_socket(server)); + Assert(server->state != SV_FREE); + + switch (evtype) { + case SBUF_EV_RECV_FAILED: + disconnect_server(server, false, "server conn crashed?"); + break; + case SBUF_EV_SEND_FAILED: + disconnect_client(server->link, false, "unexpected eof"); + break; + case SBUF_EV_READ: + if (mbuf_avail(pkt) < 5) { + log_noise("S: got partial header, trying to wait a bit"); + return false; + } + + server->request_time = get_cached_time(); + switch (server->state) { + case SV_LOGIN: + res = handle_server_startup(server, pkt); + break; + case SV_TESTED: + case SV_USED: + case SV_ACTIVE: + case SV_IDLE: + res = handle_server_work(server, pkt); + break; + default: + fatal("server_proto: server in bad state: %d", server->state); + } + break; + case SBUF_EV_CONNECT_FAILED: + Assert(server->state == SV_LOGIN); + disconnect_server(server, false, "connect failed"); + break; + case SBUF_EV_CONNECT_OK: + log_debug("S: connect ok"); + Assert(server->state == SV_LOGIN); + server->request_time = get_cached_time(); + res = handle_connect(server); + } + return res; +} + diff --git a/src/server.h b/src/server.h new file mode 100644 index 0000000..f99aa40 --- /dev/null +++ b/src/server.h @@ -0,0 +1,20 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +bool server_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg); + diff --git a/src/stats.c b/src/stats.c new file mode 100644 index 0000000..c891ff2 --- /dev/null +++ b/src/stats.c @@ -0,0 +1,167 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bouncer.h" + +static struct event ev_stats; +static usec_t old_stamp, new_stamp; + +static void reset_stats(PgStats *stat) +{ + stat->server_bytes = 0; + stat->client_bytes = 0; + stat->request_count = 0; + stat->query_time = 0; +} + +static void stat_add(PgStats *total, PgStats *stat) +{ + total->server_bytes += stat->server_bytes; + total->client_bytes += stat->client_bytes; + total->request_count += stat->request_count; + total->query_time += stat->query_time; +} + +static void calc_average(PgStats *avg, PgStats *cur, PgStats *old) +{ + uint64 qcount; + usec_t dur = get_cached_time() - old_stamp; + + reset_stats(avg); + + if (dur <= 0) + return; + + avg->request_count = USEC * (cur->request_count - old->request_count) / dur; + avg->client_bytes = USEC * (cur->client_bytes - old->client_bytes) / dur; + avg->server_bytes = USEC * (cur->server_bytes - old->server_bytes) / dur; + qcount = cur->request_count - old->request_count; + if (qcount > 0) + avg->query_time = (cur->query_time - old->query_time) / qcount; +} + +static void write_stats(PktBuf *buf, PgStats *stat, PgStats *old, char *dbname) +{ + PgStats avg; + calc_average(&avg, stat, old); + pktbuf_write_DataRow(buf, "sqqqqqqqq", dbname, + stat->request_count, stat->client_bytes, + stat->server_bytes, stat->query_time, + avg.request_count, avg.client_bytes, + avg.server_bytes, avg.query_time); +} + +bool admin_database_stats(PgSocket *client, StatList *pool_list) +{ + PgPool *pool; + List *item; + PgDatabase *cur_db = NULL; + PgStats st_total, st_db, old_db, old_total; + int rows = 0; + PktBuf *buf; + + reset_stats(&st_total); + reset_stats(&st_db); + reset_stats(&old_db); + reset_stats(&old_total); + + buf = pktbuf_dynamic(512); + if (!buf) { + admin_error(client, "no mem"); + return true; + } + + pktbuf_write_RowDescription(buf, "sqqqqqqqq", "database", + "total_requests", "total_received", + "total_sent", "total_query_time", + "avg_req", "avg_recv", "avg_sent", + "avg_query"); + statlist_for_each(item, pool_list) { + pool = container_of(item, PgPool, head); + + if (!cur_db) + cur_db = pool->db; + + if (pool->db != cur_db) { + write_stats(buf, &st_db, &old_db, cur_db->name); + + rows ++; + cur_db = pool->db; + stat_add(&st_total, &st_db); + stat_add(&old_total, &old_db); + reset_stats(&st_db); + reset_stats(&old_db); + } + + stat_add(&st_db, &pool->stats); + stat_add(&old_db, &pool->older_stats); + } + if (cur_db) { + write_stats(buf, &st_db, &old_db, cur_db->name); + stat_add(&st_total, &st_db); + stat_add(&old_total, &old_db); + rows ++; + } + admin_flush(client, buf, "SHOW"); + + return true; +} + +static void refresh_stats(int s, short flags, void *arg) +{ + List *item; + PgPool *pool; + struct timeval period = { cf_stats_period, 0 }; + PgStats old_total, cur_total, avg; + + reset_stats(&old_total); + reset_stats(&cur_total); + + old_stamp = new_stamp; + new_stamp = get_cached_time(); + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + pool->older_stats = pool->newer_stats; + pool->newer_stats = pool->stats; + + stat_add(&cur_total, &pool->stats); + stat_add(&old_total, &pool->older_stats); + } + evtimer_add(&ev_stats, &period); + + calc_average(&avg, &cur_total, &old_total); + /* send totals to logfile */ + log_info("Stats: %llu req/s, in %llu b/s, " + "out %llu b/s, query %llu us", + avg.request_count, avg.client_bytes, + avg.server_bytes, avg.query_time); +} + +void stats_setup(void) +{ + struct timeval period = { cf_stats_period, 0 }; + + new_stamp = get_time_usec(); + old_stamp = new_stamp - USEC; + + /* launch maintenance */ + evtimer_set(&ev_stats, refresh_stats, NULL); + evtimer_add(&ev_stats, &period); +} + diff --git a/src/stats.h b/src/stats.h new file mode 100644 index 0000000..cfaeb0a --- /dev/null +++ b/src/stats.h @@ -0,0 +1,22 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +void stats_setup(void); + +bool admin_database_stats(PgSocket *client, StatList *pool_list); + diff --git a/src/system.h b/src/system.h new file mode 100644 index 0000000..8a813ba --- /dev/null +++ b/src/system.h @@ -0,0 +1,83 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Required system headers + */ + +#ifdef HAVE_CONFIG_H +#include "../config.h" +#endif + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_INTTYPES_H +#include +#endif +#ifdef HAVE_STDINT_H +#include +#endif +#ifdef HAVE_CRYPT_H +#include +#endif + +#ifdef CASSERT +#define Assert(e) do { if (!(e)) fatal("Assert(%s) failed", #e); } while (0) +#else +#define Assert(e) +#endif + +#ifndef OPEN_MAX +#define OPEN_MAX sysconf(_SC_OPEN_MAX) +#endif + +/* how many microseconds in a second */ +#define USEC (1000000LL) + +typedef enum { false=0, true=1 } bool; + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + + +#define INT8OID 20 +#define INT4OID 23 +#define TEXTOID 25 + diff --git a/src/takeover.c b/src/takeover.c new file mode 100644 index 0000000..94bb1b3 --- /dev/null +++ b/src/takeover.c @@ -0,0 +1,284 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Connect to running bouncer process, load fds from it, shut it down + * and continue with them. + * + * Each row from SHOW FDS will have corresponging fd in ancillary message. + * + * Manpages: unix, sendmsg, recvmsg, cmsg, readv + */ + +#include "bouncer.h" + +/* + * Takeover done, old process shut down, + * kick this one running. + */ +static void takeover_finish(PgSocket *bouncer) +{ + disconnect_server(bouncer, false, "disko over"); + cf_reboot = 0; + resume_all(); +} + +/* parse msg for fd and info */ +static bool takeover_load_fd(MBuf *pkt, const struct cmsghdr *cmsg) +{ + int fd; + char *task, *s_addr, *user, *db; + int oldfd, port, linkfd; + uint64 ckey; + PgAddr addr; + + memset(&addr, 0, sizeof(addr)); + + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS + && cmsg->cmsg_len >= CMSG_LEN(sizeof(int))) + { + /* get the fd */ + memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); + log_debug("got fd: %d", fd); + } else + fatal("broken fd packet"); + + /* parse row contents */ + scan_text_result(pkt, "issssiqi", &oldfd, &task, &user, &db, + &s_addr, &port, &ckey, &linkfd); + if (task == NULL || s_addr == NULL) + fatal("NULL data from old process"); + + log_debug("FD row: fd=%d(%d) linkfd=%d task=%s user=%s db=%s", + oldfd, fd, linkfd, task, + user ? user : "NULL", + db ? db : "NULL"); + + /* fill address */ + addr.is_unix = strcmp(s_addr, "unix") == 0 ? true : false; + if (addr.is_unix) { + addr.port = cf_listen_port; + } else { + addr.ip_addr.s_addr = inet_addr(s_addr); + addr.port = port; + } + + /* decide what to do with it */ + if (strcmp(task, "client") == 0) + use_client_socket(fd, &addr, db, user, ckey, oldfd, linkfd); + else if (strcmp(task, "server") == 0) + use_server_socket(fd, &addr, db, user, ckey, oldfd, linkfd); + else if (strcmp(task, "pooler") == 0) + use_pooler_socket(fd, addr.is_unix); + else + fatal("unknown task: %s", task); + + return true; +} + +static void takeover_create_link(PgPool *pool, PgSocket *client) +{ + List *item; + PgSocket *server; + + statlist_for_each(item, &pool->active_server_list) { + server = container_of(item, PgSocket, head); + if (server->tmp_sk_oldfd == client->tmp_sk_linkfd) { + server->link = client; + client->link = server; + return; + } + } + fatal("takeover_create_link: failed to find pair"); +} + +/* clean the inappropriate places the old fds got stored in */ +static void takeover_clean_socket_list(StatList *list) +{ + List *item; + PgSocket *sk; + statlist_for_each(item, list) { + sk = container_of(item, PgSocket, head); + if (sk->suspended) { + sk->tmp_sk_oldfd = get_cached_time(); + sk->tmp_sk_linkfd = get_cached_time(); + } + } +} + +/* all fds loaded, create links */ +static void takeover_postprocess_fds(void) +{ + List *item, *item2; + PgSocket *client; + PgPool *pool; + + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + if (pool->admin) + continue; + statlist_for_each(item2, &pool->active_client_list) { + client = container_of(item2, PgSocket, head); + if (client->suspended && client->tmp_sk_linkfd) + takeover_create_link(pool, client); + } + } + statlist_for_each(item, &pool_list) { + pool = container_of(item, PgPool, head); + takeover_clean_socket_list(&pool->active_client_list); + takeover_clean_socket_list(&pool->active_server_list); + takeover_clean_socket_list(&pool->idle_server_list); + } +} + +static void next_command(PgSocket *bouncer, MBuf *pkt) +{ + bool res = true; + const char *cmd = mbuf_get_string(pkt); + + log_debug("takeover_recv_fds: 'C' body: %s", cmd); + if (strcmp(cmd, "SUSPEND") == 0) { + log_info("SUSPEND finished, sending SHOW FDS"); + SEND_generic(res, bouncer, 'Q', "s", "SHOW FDS;"); + } else if (strncmp(cmd, "SHOW", 4) == 0) { + + log_info("SHOW FDS finished, sending SHUTDOWN"); + + /* all fds loaded, review them */ + takeover_postprocess_fds(); + + /* all OK, kill old one */ + SEND_generic(res, bouncer, 'Q', "s", "SHUTDOWN;"); + } else + fatal("got bad CMD from old bouncer: %s", cmd); + + if (!res) + fatal("command send failed"); +} + +static void takeover_parse_data(PgSocket *bouncer, + struct msghdr *msg, MBuf *data) +{ + struct cmsghdr *cmsg; + unsigned pkt_type, pkt_len; + uint8 *pktptr; + MBuf pkt; + + cmsg = msg->msg_controllen ? CMSG_FIRSTHDR(msg) : NULL; + + while (mbuf_avail(data) > 0) { + if (!get_header(data, &pkt_type, &pkt_len)) + fatal("cannot parse packet"); + + pktptr = (uint8*)mbuf_get_bytes(data, pkt_len - 5); + mbuf_init(&pkt, pktptr, pkt_len - 5); + + switch (pkt_type) { + case 'T': /* RowDescription */ + log_debug("takeover_parse_data: 'T'"); + break; + case 'D': /* DataRow */ + log_debug("takeover_parse_data: 'D'"); + if (cmsg) { + takeover_load_fd(&pkt, cmsg); + cmsg = CMSG_NXTHDR(msg, cmsg); + } else + fatal("got row without fd info"); + break; + case 'Z': /* ReadyForQuery */ + log_debug("takeover_parse_data: 'Z'"); + break; + case 'C': /* CommandComplete */ + log_debug("takeover_parse_data: 'C'"); + next_command(bouncer, &pkt); + break; + case 'E': /* ErrorMessage */ + log_server_error("old bouncer sent", &pkt); + fatal("something failed"); + default: + fatal("takeover_parse_data: unexpected pkt: '%c'", pkt_type); + } + } +} + +/* + * listen for data from old bouncer. + * + * use always sendmsg, to keep code simpler + */ +static void takeover_recv_cb(int sock, short flags, void *arg) +{ + PgSocket *bouncer = arg; + uint8 data_buf[2048]; + uint8 cnt_buf[128]; + struct msghdr msg; + struct iovec io; + int res; + MBuf data; + + memset(&msg, 0, sizeof(msg)); + io.iov_base = data_buf; + io.iov_len = sizeof(data_buf); + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = cnt_buf; + msg.msg_controllen = sizeof(cnt_buf); + + res = safe_recvmsg(sock, &msg, 0); + if (res > 0) { + mbuf_init(&data, data_buf, res); + takeover_parse_data(bouncer, &msg, &data); + } else if (res == 0) { + takeover_finish(bouncer); + } else { + if (errno == EAGAIN) + return; + fatal_perror("safe_recvmsg"); + } +} + +/* + * login finished, send first command, + * replace recv callback with custom recvmsg() based one. + */ +void takeover_login(PgSocket *bouncer) +{ + bool res; + + slog_info(bouncer, "Login OK, sending SUSPEND"); + SEND_generic(res, bouncer, 'Q', "s", "SUSPEND;"); + + /* use own callback */ + sbuf_pause(&bouncer->sbuf); + sbuf_continue_with_callback(&bouncer->sbuf, takeover_recv_cb); +} + +/* launch connection to running process */ +void takeover_init(void) +{ + PgDatabase *db = find_database("pgbouncer"); + PgPool *pool = get_pool(db, db->forced_user); + + if (!pool) + fatal("no admin pool?"); + + log_info("takeover_init: launching connection"); + launch_new_connection(pool); +} + diff --git a/src/takeover.h b/src/takeover.h new file mode 100644 index 0000000..08ca095 --- /dev/null +++ b/src/takeover.h @@ -0,0 +1,21 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +void takeover_init(void); +void takeover_login(PgSocket *bouncer); + diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..920f0bc --- /dev/null +++ b/src/util.c @@ -0,0 +1,575 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Random small utility functions + */ + +#include "bouncer.h" + +#include "md5.h" + +#ifdef HAVE_SYS_UCRED_H +#include +#endif + +void *zmalloc(size_t len) +{ + void *p = malloc(len); + if (p) + memset(p, 0, len); + return p; +} + +/* + * Safe string copy + */ +#ifndef HAVE_STRLCPY +size_t strlcpy(char *dst, const char *src, size_t n) +{ + size_t len = strlen(src); + if (len < n) { + memcpy(dst, src, len + 1); + } else if (n > 0) { + memcpy(dst, src, n - 1); + dst[n - 1] = 0; + } + return len; +} +#endif +#ifndef HAVE_STRLCAT +size_t strlcat(char *dst, const char *src, size_t n) +{ + size_t pos = 0; + while (pos < n && dst[pos]) + pos++; + if (pos < n) + return pos + strlcpy(dst + pos, src, n - pos); + return pos + strlen(src); +} +#endif + +/* + * Generic logging + */ + +static void render_time(char *buf, int max) +{ + struct tm tm; + struct timeval tv; + gettimeofday(&tv, NULL); + localtime_r(&tv.tv_sec, &tm); + strftime(buf, max, "%Y-%m-%d %H:%M:%S", &tm); +} + +static void _log_write(const char *pfx, const char *msg) +{ + char buf[1024]; + char tbuf[64]; + int len; + render_time(tbuf, sizeof(tbuf)); + len = snprintf(buf, sizeof(buf), "%s %u %s %s\n", + tbuf, (unsigned)getpid(), pfx, msg); + if (cf_logfile) { + int fd = open(cf_logfile, O_CREAT | O_APPEND | O_WRONLY, 0644); + if (fd > 0) { + safe_write(fd, buf, len); + safe_close(fd); + } + } + if (!cf_daemon) + fprintf(stderr, "%s", buf); +} + +static void _log(const char *pfx, const char *fmt, va_list ap) +{ + char buf[1024]; + vsnprintf(buf, sizeof(buf), fmt, ap); + _log_write(pfx, buf); +} + +void _fatal(const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + char buf[1024]; + + snprintf(buf, sizeof(buf), + "@%s:%d in function %s(): %s", + file, line, func, fmt); + + va_start(ap, fmt); + _log("FATAL", buf, ap); + va_end(ap); + if (cf_verbose > 2) + abort(); + exit(1); +} + +void _fatal_perror(const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + char buf[1024]; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + _fatal(file, line, func, "%s: %s", buf, strerror(errno)); +} + +/* + * generic logging + */ +void log_level(const char *pfx, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + _log(pfx, fmt, ap); + va_end(ap); +} + +/* + * Logging about specific PgSocket + */ + +void +slog_level(const char *pfx, const PgSocket *sock, const char *fmt, ...) +{ + char buf1[1024]; + char buf2[1024]; + char *user, *db, *host; + int port; + va_list ap; + + db = sock->pool ? sock->pool->db->name : "(nodb)"; + user = sock->auth_user ? sock->auth_user->name : "(nouser)"; + if (sock->addr.is_unix) { + host = "unix"; + } else { + host = inet_ntoa(sock->addr.ip_addr); + } + port = sock->addr.port; + + va_start(ap, fmt); + vsnprintf(buf1, sizeof(buf1), fmt, ap); + va_end(ap); + + snprintf(buf2, sizeof(buf2), "%c: %s/%s@%s:%d %s", + is_server_socket(sock) ? 'S' : 'C', + db, user, host, port, buf1); + + _log_write(pfx, buf2); +} + + +/* + * Wrappers for read/write/recv/send that survive interruptions. + */ + +int safe_read(int fd, void *buf, int len) +{ + int res; +loop: + res = read(fd, buf, len); + if (res < 0 && errno == EINTR) + goto loop; + return res; +} + +int safe_write(int fd, const void *buf, int len) +{ + int res; +loop: + res = write(fd, buf, len); + if (res < 0 && errno == EINTR) + goto loop; + return res; +} + +int safe_recv(int fd, void *buf, int len, int flags) +{ + int res; +loop: + res = recv(fd, buf, len, flags); + if (res < 0 && errno == EINTR) + goto loop; + if (res < 0) + log_noise("safe_recv(%d, %d) = %s", fd, len, strerror(errno)); + else if (cf_verbose > 2) + log_noise("safe_recv(%d, %d) = %d", fd, len, res); + return res; +} + +int safe_send(int fd, const void *buf, int len, int flags) +{ + int res; +loop: + res = send(fd, buf, len, flags); + if (res < 0 && errno == EINTR) + goto loop; + if (res < 0) + log_noise("safe_send(%d, %d) = %s", fd, len, strerror(errno)); + else if (cf_verbose > 2) + log_noise("safe_send(%d, %d) = %d", fd, len, res); + return res; +} + +int safe_close(int fd) +{ + int res; +loop: + /* by manpage, the close() could be interruptable + although it seems that at least in linux it cannot happen */ + res = close(fd); + if (res < 0 && errno == EINTR) + goto loop; + return res; +} + +int safe_recvmsg(int fd, struct msghdr *msg, int flags) +{ + int res; +loop: + res = recvmsg(fd, msg, flags); + if (res < 0 && errno == EINTR) + goto loop; + if (res < 0) + log_warning("safe_recvmsg(%d, msg, %d) = %s", fd, flags, strerror(errno)); + else if (cf_verbose > 2) + log_noise("safe_recvmsg(%d, msg, %d) = %d", fd, flags, res); + return res; +} + +int safe_sendmsg(int fd, const struct msghdr *msg, int flags) +{ + int res; + int msgerr_count = 0; +loop: + res = sendmsg(fd, msg, flags); + if (res < 0 && errno == EINTR) + goto loop; + + if (res < 0) { + log_warning("safe_sendmsg(%d, msg[%d,%d], %d) = %s", fd, + msg->msg_iov[0].iov_len, + msg->msg_controllen, + flags, strerror(errno)); + + /* with ancillary data pn blocking socket OSX returns + * EMSGSIZE instead of blocking. try to solve it by waiting */ + if (errno == EMSGSIZE && msgerr_count < 20) { + struct timeval tv = {1, 0}; + log_warning("trying to sleep a bit"); + select(0, NULL, NULL, NULL, &tv); + msgerr_count++; + goto loop; + } + } else if (cf_verbose > 2) + log_noise("safe_sendmsg(%d, msg, %d) = %d", fd, flags, res); + return res; +} + +/* + * Load a file into malloc()-ed C string. + */ + +char *load_file(const char *fn) +{ + struct stat st; + char *buf = NULL; + int res, fd; + + res = stat(fn, &st); + if (res < 0) { + log_error("%s: %s", fn, strerror(errno)); + goto load_error; + } + + buf = malloc(st.st_size + 1); + if (!buf) + goto load_error; + + if ((fd = open(fn, O_RDONLY)) < 0) { + log_error("%s: %s", fn, strerror(errno)); + goto load_error; + } + + if ((res = safe_read(fd, buf, st.st_size)) < 0) { + log_error("%s: %s", fn, strerror(errno)); + goto load_error; + } + + safe_close(fd); + buf[st.st_size] = 0; + + return buf; + +load_error: + if (buf != NULL) + free(buf); + return NULL; +} + +/* + * PostgreSQL MD5 "encryption". + */ + +static void hash2hex(const uint8 *hash, char *dst) +{ + int i; + static const char hextbl [] = "0123456789abcdef"; + for (i = 0; i < MD5_DIGEST_LENGTH; i++) { + *dst++ = hextbl[hash[i] >> 4]; + *dst++ = hextbl[hash[i] & 15]; + } + *dst = 0; +} + +bool pg_md5_encrypt(const char *part1, + const char *part2, size_t part2len, + char *dest) +{ + MD5_CTX ctx; + uint8 hash[MD5_DIGEST_LENGTH]; + + MD5_Init(&ctx); + MD5_Update(&ctx, part1, strlen(part1)); + MD5_Update(&ctx, part2, part2len); + MD5_Final(hash, &ctx); + + memcpy(dest, "md5", 3); + hash2hex(hash, dest + 3); + + memset(hash, 0, sizeof(*hash)); + return true; +} + +/* wrapper for usable crypt() */ +const char *pg_crypt(const char *passwd, const char *salt) +{ + return crypt(passwd, salt); +} + +/* wrapped for getting random bytes */ +bool get_random_bytes(uint8 *dest, int len) +{ + int i; + for (i = 0; i < len; i++) + dest[i] = random() & 255; + return len; +} + +/* + * high-precision time + */ + +usec_t get_time_usec(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (usec_t)tv.tv_sec * USEC + tv.tv_usec; +} + +/* + * cache time, as we dont need sub-second precision + */ +static usec_t time_cache = 0; + +usec_t get_cached_time(void) +{ + if (!time_cache) + time_cache = get_time_usec(); + return time_cache; +} + +void reset_time_cache(void) +{ + time_cache = 0; +} + +/* + * get other side's uid. + */ +bool get_unix_peer_uid(int fd, uid_t *uid_p) +{ + int res = -1; +#ifdef SO_PEERCRED + struct ucred cred; + socklen_t len = sizeof(cred); + res = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len); + if (res >= 0) + *uid_p = cred.uid; + else + log_error("getsockopt(SO_PEERCRED): %s", strerror(errno)); +#else /* !SO_PEERCRED */ +#ifdef LOCAL_PEERCRED + struct xucred cred; + socklen_t len = sizeof(cred); + res = getsockopt(fd, AF_UNIX, LOCAL_PEERCRED, &cred, &len); + if (res >= 0) + *uid_p = cred.cr_uid; + else + log_error("getsockopt(LOCAL_PEERCRED): %s", strerror(errno)); +#endif /* !LOCAL_PEERCRED */ +#endif /* !SO_PEERCRED */ + return (res >= 0); +} + +void socket_set_nonblocking(int fd, int val) +{ + int flags, res; + + /* get old flags */ + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) + fatal_perror("fcntl(F_GETFL)"); + + /* flip O_NONBLOCK */ + if (val) + flags |= O_NONBLOCK; + else + flags &= ~O_NONBLOCK; + + /* set new flags */ + res = fcntl(fd, F_SETFL, flags); + if (res < 0) + fatal_perror("fcntl(F_SETFL)"); +} + +/* set needed socket options */ +void tune_socket(int sock, bool is_unix) +{ + int res; + int val; + + /* close fd on exec */ + res = fcntl(sock, F_SETFD, FD_CLOEXEC); + if (res < 0) + fatal_perror("fcntl FD_CLOEXEC"); + + /* when no data avail, return EAGAIN instead blocking */ + socket_set_nonblocking(sock, 1); + +#ifdef SO_NOSIGPIPE + /* disallow SIGPIPE, if possible */ + val = 1; + res = setsockopt(sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt SO_NOSIGPIPE"); +#endif + + /* + * Following options are for network sockets + */ + if (is_unix) + return; + + /* the keepalive stuff needs some poking before enbling */ + if (cf_tcp_keepalive) { + /* turn on socket keepalive */ + val = 1; + res = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt SO_KEEPALIVE"); +#ifdef __linux__ + /* set count of keepalive packets */ + if (cf_tcp_keepcnt > 0) { + val = cf_tcp_keepcnt; + res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt TCP_KEEPCNT"); + } + /* how lond the connection can stay idle before sending keepalive pkts */ + if (cf_tcp_keepidle) { + val = cf_tcp_keepidle; + res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt TCP_KEEPIDLE"); + } + /* time between packets */ + if (cf_tcp_keepintvl) { + val = cf_tcp_keepintvl; + res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt TCP_KEEPINTVL"); + } +#else +#ifdef TCP_KEEPALIVE + if (cf_tcp_keepidle) { + val = cf_tcp_keepidle; + res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt TCP_KEEPALIVE"); + } +#endif +#endif + } + + /* set in-kernel socket buffer size */ + if (cf_tcp_socket_buffer) { + val = cf_tcp_socket_buffer; + res = setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt SO_SNDBUF"); + val = cf_tcp_socket_buffer; + res = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt SO_RCVBUF"); + } + + /* + * Turn off kernel buffering, each send() will be one packet. + */ + val = 1; + res = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); + if (res < 0) + fatal_perror("setsockopt TCP_NODELAY"); +} + + +bool strlist_contains(const char *liststr, const char *str) +{ + int c, len = strlen(str); + const char *p = strstr(liststr, str); + + if (p == NULL) + return false; + + /* check if item start */ + if (p > liststr) { + c = *(p - 1); + if (!isspace(c) && c != ',') + return false; + } + + /* check if item end */ + c = p[len]; + if (c != 0 && !isspace(c) && c != ',') + return false; + + return true; +} + +const char *format_date(usec_t uval) +{ + static char buf[128]; + time_t tval = uval / USEC; + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&tval)); + return buf; +} + diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..359fa19 --- /dev/null +++ b/src/util.h @@ -0,0 +1,114 @@ +/* + * PgBouncer - Lightweight connection pooler for PostgreSQL. + * + * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * time tools + */ +typedef uint64_t usec_t; +usec_t get_cached_time(void); +void reset_time_cache(void); +usec_t get_time_usec(void); + +/* + * load file into malloced buffer + */ +char *load_file(const char *fn); + +void *zmalloc(size_t len); + +/* + * generic logging + */ +void log_level(const char *level, const char *s, ...); +#define log_error(args...) log_level("ERROR", ## args) +#define log_warning(args...) log_level("WARNING", ## args) +#define log_info(args...) log_level("LOG", ## args) +#define log_debug(args...) do { \ + if (cf_verbose > 0) log_level("DEBUG", ## args); \ + } while (0) +#define log_noise(args...) do { \ + if (cf_verbose > 1) log_level("NOISE", ## args); \ + } while (0) + + +/* + * logging about specific socket + */ +void slog_level(const char *level, const PgSocket *sock, const char *fmt, ...); +#define slog_error(sk, args...) slog_level("ERROR", sk, ## args) +#define slog_warning(sk, args...) slog_level("WARNING", sk, ## args) +#define slog_info(sk, args...) slog_level("LOG", sk, ## args) +#define slog_debug(sk, args...) do { \ + if (cf_verbose > 0) slog_level("DEBUG", sk, ## args); \ + } while (0) +#define slog_noise(sk, args...) do { \ + if (cf_verbose > 1) slog_level("NOISE", sk, ## args); \ + } while (0) + +/* + * log and exit + */ +void _fatal(const char *file, int line, const char *func, const char *s, ...); +void _fatal_perror(const char *file, int line, const char *func, const char *s, ...); +#define fatal(args...) \ + _fatal(__FILE__, __LINE__, __FUNCTION__, ## args) +#define fatal_perror(args...) \ + _fatal_perror(__FILE__, __LINE__, __FUNCTION__, ## args) + +/* + * non-interruptible operations + */ +int safe_read(int fd, void *buf, int len); +int safe_write(int fd, const void *buf, int len); +int safe_recv(int fd, void *buf, int len, int flags); +int safe_send(int fd, const void *buf, int len, int flags); +int safe_close(int fd); +int safe_recvmsg(int fd, struct msghdr *msg, int flags); +int safe_sendmsg(int fd, const struct msghdr *msg, int flags); + +/* + * password tools + */ +#define MD5_PASSWD_LEN 35 +#define isMD5(passwd) (memcmp(passwd, "md5", 3) == 0 \ + && strlen(passwd) == MD5_PASSWD_LEN) +bool pg_md5_encrypt(const char *part1, const char *part2, size_t p2len, char *dest); +const char *pg_crypt(const char *passwd, const char *salt); +bool get_random_bytes(uint8 *dest, int len); + +/* + * safe string copy + */ +#ifndef HAVE_STRLCPY +size_t strlcpy(char *dst, const char *src, size_t n); +#endif +#ifndef HAVE_STRLCAT +size_t strlcat(char *dst, const char *src, size_t n); +#endif + +/* + * socket option handling + */ +bool get_unix_peer_uid(int fd, uid_t *uid_p); +void socket_set_nonblocking(int fd, int val); +void tune_socket(int sock, bool is_unix); + +bool strlist_contains(const char *liststr, const char *str); + +const char *format_date(usec_t uval); + diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..f4e6294 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,14 @@ + +PGINC = -I$(shell pg_config --includedir) +PGLIB = -L$(shell pg_config --libdir) + +CFLAGS = -O2 -g -Wall $(PGINC) -I$(HOME)/src/libevent -I../src +LDFLAGS = $(PGLIB) -lpq -L$(HOME)/src/libevent/.libs -levent + +all: asynctest + +asynctest: asynctest.c + +clean: + rm -f asynctest + diff --git a/test/asynctest.c b/test/asynctest.c new file mode 100644 index 0000000..5d7a074 --- /dev/null +++ b/test/asynctest.c @@ -0,0 +1,264 @@ +/* + * Things to test: + * - Conn per query + * - show tx + * - long tx + * - variable-size query + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define Assert(e) do { if (!(e)) { \ + printf("Assert(%s) failed: %s:%d in %s\n", \ + #e, __FILE__, __LINE__, __FUNCTION__); \ + exit(1); } } while (0) + +typedef enum { false=0, true=1 } bool; + +#include "list.h" + +typedef struct DbConn { + List head; + const char *connstr; + struct event ev; + //time_t connect_time; + //unsigned query_count; + PGconn *con; + const char *query; +} DbConn; + +static LIST(idle_list); +static LIST(active_list); + +static DbConn *new_db(const char *connstr) +{ + DbConn *db = malloc(sizeof(*db)); + memset(db, 0, sizeof(*db)); + list_init(&db->head); + db->connstr = connstr; + return db; +} + +static void set_idle(DbConn *db) +{ + Assert(item_in_list(&db->head, &active_list)); + list_del(&db->head); + list_append(&db->head, &idle_list); +} + +static void set_active(DbConn *db) +{ + Assert(item_in_list(&db->head, &idle_list)); + list_del(&db->head); + list_append(&db->head, &active_list); +} + +/** some error happened */ +static void conn_error(DbConn *db, const char *desc) +{ + if (db->con) { + printf("libpq error in %s: %s\n", + desc, PQerrorMessage(db->con)); + PQfinish(db->con); + db->con = NULL; + } else { + printf("random error\n"); + } + set_idle(db); +} + +/** + * Connection has a resultset avalable, fetch it. + * + * Returns true if there may be more results coming, + * false if all done. + */ +static bool another_result(DbConn *db) +{ + PGresult *res; + + /* got one */ + res = PQgetResult(db->con); + if (res == NULL) { + set_idle(db); + if (1) { + PQfinish(db->con); + db->con = NULL; + } + return false; + } + + switch (PQresultStatus(res)) { + case PGRES_TUPLES_OK: + // todo: check result + case PGRES_COMMAND_OK: + PQclear(res); + break; + default: + PQclear(res); + conn_error(db, "weird result"); + return false; + } + return true; +} + +/** + * Called when select() told that conn is avail for reading/writing. + * + * It should call postgres handlers and then change state if needed. + */ +static void result_cb(int sock, short flags, void *arg) +{ + DbConn *db = arg; + int res; + + res = PQconsumeInput(db->con); + if (res == 0) { + conn_error(db, "PQconsumeInput"); + return; + } + + /* loop until PQgetResult returns NULL */ + while (1) { + /* if PQisBusy, then incomplete result */ + if (PQisBusy(db->con)) { + event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db); + event_add(&db->ev, NULL); + break; + } + + /* got one */ + if (!another_result(db)) + break; + } +} + +static void send_cb(int sock, short flags, void *arg) +{ + int res; + DbConn *db = arg; + + res = PQflush(db->con); + if (res > 0) { + event_set(&db->ev, PQsocket(db->con), EV_WRITE, send_cb, db); + event_add(&db->ev, NULL); + } else if (res == 0) { + event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db); + event_add(&db->ev, NULL); + } else + conn_error(db, "PQflush"); +} + +/** send the query to server connection */ +static void send_query(DbConn *db) +{ + int res; + + /* send query */ + res = PQsendQueryParams(db->con, db->query, 0, + NULL, /* paramTypes */ + NULL, /* paramValues */ + NULL, /* paramLengths */ + NULL, /* paramFormats */ + 0); /* resultformat, 0-text, 1-bin */ + if (!res) { + conn_error(db, "PQsendQueryParams"); + return; + } + + /* flush it down */ + res = PQflush(db->con); + if (res > 0) { + event_set(&db->ev, PQsocket(db->con), EV_WRITE, send_cb, db); + event_add(&db->ev, NULL); + } else if (res == 0) { + event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db); + event_add(&db->ev, NULL); + } else + conn_error(db, "PQflush"); +} + +static void connect_cb(int sock, short flags, void *arg) +{ + DbConn *db = arg; + PostgresPollingStatusType poll_res; + + poll_res = PQconnectPoll(db->con); + switch (poll_res) { + case PGRES_POLLING_WRITING: + event_set(&db->ev, PQsocket(db->con), EV_WRITE, connect_cb, db); + event_add(&db->ev, NULL); + break; + case PGRES_POLLING_READING: + event_set(&db->ev, PQsocket(db->con), EV_READ, connect_cb, db); + event_add(&db->ev, NULL); + break; + case PGRES_POLLING_OK: + send_query(db); + break; + case PGRES_POLLING_ACTIVE: + case PGRES_POLLING_FAILED: + conn_error(db, "PQconnectPoll"); + } +} + +static void launch_connect(DbConn *db) +{ + /* launch new connection */ + db->con = PQconnectStart(db->connstr); + if (db->con == NULL) { + conn_error(db, "PQconnectStart: no mem"); + return; + } + + if (PQstatus(db->con) == CONNECTION_BAD) { + conn_error(db, "PQconnectStart"); + return; + } + + event_set(&db->ev, PQsocket(db->con), EV_WRITE, connect_cb, db); + event_add(&db->ev, NULL); +} + +static void handle_idle(DbConn *db) +{ + set_active(db); + if (db->con) + send_query(db); + else + launch_connect(db); +} + +int main(void) +{ + int i; + DbConn *db; + List *item, *tmp; + + for (i = 0; i < 10; i++) { + db = new_db("dbname=marko port=6000 host=/tmp"); + db->query = "select 1"; + list_append(&db->head, &idle_list); + } + + event_init(); + + while (1) { + event_loop(EVLOOP_ONCE); + list_for_each_safe(item, &idle_list, tmp) { + db = container_of(item, DbConn, head); + handle_idle(db); + } + } + return 0; +} + + diff --git a/test/stress.py b/test/stress.py new file mode 100755 index 0000000..836cc79 --- /dev/null +++ b/test/stress.py @@ -0,0 +1,114 @@ +#! /usr/bin/env python + +import sys, os, re, time, psycopg +import threading, thread, random + +n_thread = 100 +longtx = 0 +tx_sleep = 0 +tx_sleep = 8 + +conn_data = { + 'dbname': 'marko', + #'host': '127.0.0.1', + 'host': '/tmp', + 'port': '6000', + 'user': 'marko', + #'password': '', + 'connect_timeout': '5', +} + +def get_connstr(): + tmp = [] + for k, v in conn_data.items(): + tmp.append(k+'='+v) + return " ".join(tmp) + +class WorkThread(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + self.setDaemon(True) + self.stat_lock = threading.Lock() + self.query_cnt = 0 + + def inc_cnt(self): + self.stat_lock.acquire() + self.query_cnt += 1 + self.stat_lock.release() + + def fetch_cnt(self): + self.stat_lock.acquire() + val = self.query_cnt + self.query_cnt = 0 + self.stat_lock.release() + return val + + def run(self): + try: + time.sleep(random.random() * 10.0) + except: pass + while 1: + try: + self.main_loop() + except KeyboardInterrupt: + break + except SystemExit: + break + except Exception, d: + print d + try: + time.sleep(5) + except: pass + + def main_loop(self): + db = psycopg.connect(get_connstr()) + if not longtx: + db.autocommit(1) + n = 0 + while n < 10: + self.do_work(db) + self.inc_cnt() + n += 1 + + def do_work(self, db): + curs = db.cursor() + q = "select pg_sleep(%.02f)" % (random.random() * 1) + curs.execute(q) + time.sleep(tx_sleep * random.random() + 1) + if longtx: + db.commit() + +def main(): + print "connstr", get_connstr() + + thread_list = [] + while len(thread_list) < n_thread: + t = WorkThread() + t.start() + thread_list.append(t) + + print "started %d threads" % len(thread_list) + + last = time.time() + while 1: + time.sleep(1) + now = time.time() + dur = now - last + if dur >= 5: + last = now + cnt = 0 + for t in thread_list: + cnt += t.fetch_cnt() + avg = cnt / dur + print "avg", avg + +if __name__ == '__main__': + try: + main() + except SystemExit: + pass + except KeyboardInterrupt: + pass + #except Exception, d: + # print d + diff --git a/test/test.ini b/test/test.ini new file mode 100644 index 0000000..fa85fa7 --- /dev/null +++ b/test/test.ini @@ -0,0 +1,136 @@ +;; database name = connect string +[databases] + +p0 = port=6666 host=127.0.0.1 dbname=p0 user=bouncer pool_size=2 +p1 = port=6666 host=127.0.0.1 dbname=p1 user=bouncer +p2 = port=6668 host=127.0.0.1 dbname=p2 user=bouncer + +;; Configuation section +[pgbouncer] + +;;; +;;; Administrative settings +;;; + +logfile = test.log +pidfile = test.pid + +;;; +;;; Where to wait for clients +;;; + +; ip address or * which means all ip-s +listen_addr = 127.0.0.1 +listen_port = 6667 +unix_socket_dir = /tmp + +;;; +;;; Authentication settings +;;; + +; any, trust, plain, crypt, md5 +auth_type = trust +#auth_file = 8.0/main/global/pg_auth +auth_file = userlist.txt + +;;; +;;; Pooler personality questions +;;; + +; When server connection is released back to pool: +; session - after client disconnects +; transaction - after transaction finishes +; statement - after statement finishes +pool_mode = statement + +; When taking idle server into use, this query is ran first. +; +; Query for session pooling: +; ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT +; Query for statement/transaction pooling: +; SELECT 1 +; Empty query disables the functionality +server_check_query = select 1 + +; If server was used more recently that this many seconds ago, +; skip the check query. If 0, the check query is always ran. +server_check_delay = 10 + +;;; +;;; Connection limits +;;; + +; total number of clients that can connect +max_client_conn = 10 +default_pool_size = 5 + +;;; +;;; Timeouts +;;; + +; Close server connection if its been connected longer. +server_lifetime = 120 + +; Close server connection if its not been used in this time. +; Allows to clean unneccessary connections from pool after peak. +server_idle_timeout = 60 + +; Cancel connection attepmt if server does not answer takes longer. +server_connect_timeout = 15 + +; If server login failed (server_connect_timeout or auth failure) +; then wait this many second. +server_login_retry = 15 + +; Dangerous. Server connection is closed if query does not return +; in this time. Should be used to survive network problems, +; _not_ as statement_timeout. (default: 0) +query_timeout = 20 + +; Dangerous. Client connection is closed if no activity in this time. +; Should be used to survive network problems. (default: 0) +client_idle_timeout = 0 + + +;;; +;;; Low-level tuning options +;;; + +; buffer for streaming packets +pkt_buf = 2048 + +;;; +;;; networking options, for info: man 7 tcp +;;; + +; linux: notify program about new connection only if there +; is also data received. (Seconds to wait.) +tcp_defer_accept = 0 + +;; following options are reloadable, but apply only to +;; new connections. + +; in-kernel buffer size (linux default: 4096) +tcp_socket_buffer = 0 + +; whether tcp keepalive should be turned on (0/1) +tcp_keepalive = 0 + +;; following options are linux-specific. +;; they also require tcp_keepalive=1 + +; count of keepaliva packets +tcp_keepcnt = 0 + +; how long the connection can be idle, +; before sending keepalive packets +tcp_keepidle = 0 + +; The time between individual keepalive probes. +tcp_keepintvl = 0 + +; By default, max tcp packet cannot be larger than pkt_buf. +; If this is set, then bouncer tells to kernel to queue packets. +; Then max pkt length is tcp_socket_buffer. +tcp_buffer_more = 0 + diff --git a/test/test.sh b/test/test.sh new file mode 100755 index 0000000..ed2a4c1 --- /dev/null +++ b/test/test.sh @@ -0,0 +1,404 @@ +#!/bin/sh + +# Notes: +# - uses iptables and -F with some tests, probably not very friendly to your firewall +# - uses nc (netcat) with some tests, skips if not in path +# - assumes postgres 8.2 fix your path so that it comes first + +export PATH=/usr/lib/postgresql/8.2/bin:$PATH +export PGDATA=$PWD/pgdata +export PGHOST=localhost +export PGPORT=6667 +export EF_ALLOW_MALLOC_0=1 + +BOUNCER_LOG=test.log +BOUNCER_INI=test.ini +BOUNCER_PID=test.pid +BOUNCER_PORT=`sed -n '/^listen_port/s/listen_port.*=[^0-9]*//p' $BOUNCER_INI` +BOUNCER_EXE=./pgbouncer + +LOGDIR=log +NC_PORT=6668 +PG_PORT=6666 +PG_LOG=$LOGDIR/pg.log + +pgctl() { + pg_ctl -o "-p $PG_PORT" -D $PGDATA $@ >>$PG_LOG 2>&1 +} + +mkdir -p $LOGDIR +rm -f $BOUNCER_LOG $PG_LOG +# rm -r $PGDATA + +if [ ! -d $PGDATA ]; then + mkdir $PGDATA + initdb >/dev/null 2>&1 +fi + +pgctl start +sleep 5 + +psql -p $PG_PORT -l |grep p0 > /dev/null || { + psql -p $PG_PORT -c "create user bouncer" template1 + createdb -p $PG_PORT p0 + createdb -p $PG_PORT p1 +} + +$BOUNCER_EXE -d $BOUNCER_INI +sleep 1 + +# +# fw hacks +# + +fw_drop_port() { + case `uname` in + Linux) + sudo iptables -A OUTPUT -p tcp --dport $1 -j DROP;; + Darwin) + sudo ipfw add 100 drop tcp from any to 127.0.0.1 dst-port $1;; + *) + echo "Unknown OS";; + esac +} +fw_reject_port() { + case `uname` in + Linux) + sudo iptables -A OUTPUT -p tcp --dport $1 -j REJECT --reject-with tcp-reset;; + Darwin) + sudo ipfw add 100 reset tcp from any to 127.0.0.1 dst-port $1;; + *) + echo "Unknown OS";; + esac +} + +fw_reset() { + case `uname` in + Linux) + sudo iptables -F;; + Darwin) + sudo ipfw del 100;; + *) + echo "Unknown OS"; exit 1;; + esac +} + +# +# util functions +# + +complete() { + test -f $BOUNCER_PID && kill `cat $BOUNCER_PID` >/dev/null 2>&1 + pgctl -m fast stop + rm -f $BOUNCER_PID +} + +die() { + echo $@ + complete + exit 1 +} + +admin() { + psql -h /tmp -U pgbouncer pgbouncer -c "$@;" || die "Cannot contact bouncer!" +} + +runtest() { + echo -n "`date` running $1 ... " + eval $1 >$LOGDIR/$1.log 2>&1 + if [ $? -eq 0 ]; then + echo "SUCCESS" + else + echo "FAILED" + fi + date >> $LOGDIR/$1.log + + # allow background processing to complete + wait + # start with fresh config + kill -HUP `cat $BOUNCER_PID` +} + +# server_lifetime +test_server_lifetime() { + admin "set server_lifetime=2" + psql -c "select now()" p0 + sleep 3 + + rc=`psql -p $PG_PORT -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='p0'" p0` + psql -c "select now()" p0 + return $rc +} + +# server_idle_timeout +test_server_idle_timeout() { + admin "set server_idle_timeout=2" + psql -c "select now()" p0 + sleep 3 + rc=`psql -p $PG_PORT -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='p0'" p0` + psql -c "select now()" p0 + return $rc +} + +# query_timeout +test_query_timeout() { + admin "set query_timeout=3" + psql -c "select pg_sleep(5)" p0 && return 1 + return 0 +} + +# client_idle_timeout +test_client_idle_timeout() { + admin "set client_idle_timeout=2" + psql --set ON_ERROR_STOP=1 p0 <<-PSQL_EOF + select now(); + \! sleep 3 + select now(); + PSQL_EOF + test $? -eq 0 && return 1 + return 0 +} + +# server_login_retry +test_server_login_retry() { + admin "set query_timeout=10" + admin "set server_login_retry=1" + + (pgctl -m fast stop; sleep 3; pgctl start) & + sleep 1 + psql -c "select now()" p0 + rc=$? + wait + return $rc +} + +# server_connect_timeout - uses netcat to start dummy server +test_server_connect_timeout_establish() { + which nc >/dev/null || return 1 + + nc -l -p $NC_PORT >/dev/null & + admin "set query_timeout=3" + admin "set server_connect_timeout=2" + psql -c "select now()" p2 + # client will always see query_timeout, need to grep for connect timeout + grep "closing because: connect timeout" $BOUNCER_LOG + # didnt seem to die otherwise + killall nc + return $? +} + +# server_connect_timeout - block with iptables +# XXX: for some reason bouncer says 'connect failed' not 'connect timeout' +test_server_connect_timeout_reject() { + test -z $CAN_SUDO && return 1 + admin "set query_timeout=5" + admin "set server_connect_timeout=3" + fw_drop_port $PG_PORT + psql -c "select now()" p0 + fw_reset + # client will always see query_timeout, need to grep for connect timeout + grep "closing because: connect failed" $BOUNCER_LOG +} + +# server_check_delay +test_server_check_delay() { + test -z $CAN_SUDO && return 1 + + admin "set server_check_delay=2" + admin "set server_login_retry=3" + admin "set query_timeout=10" + + psql p0 -c "select now()" + fw_reject_port $PG_PORT + sleep 3 + psql -tAq p0 -c "select 1" >$LOGDIR/test.tmp & + sleep 1 + fw_reset + echo `date` rules flushed + wait + echo `date` done waiting + + test "`cat $LOGDIR/test.tmp`" = "1" +} + +# max_client_conn +test_max_client_conn() { + admin "set max_client_conn=5" + admin "show config" + + for i in `seq 1 4`; do + psql p1 -c "select now() as sleeping from pg_sleep(3);" & + done + + # last conn allowed + psql p1 -c "select now() as last_conn" || return 1 + + # exhaust it + psql p1 -c "select now() as sleeping from pg_sleep(3);" & + sleep 1 + + # shouldn't be allowed + psql p1 -c "select now() as exhausted" && return 1 + + # should be ok + echo 'waiting for clients to complete ...' + wait + psql p1 -c "select now() as ok" || return 1 + + return 0 +} + +# - max pool size +test_pool_size() { + + docount() { + for i in `seq 10`; do + psql $1 -c "select pg_sleep(0.5)" & + done + wait + cnt=`psql -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='$1'" $1` + echo $cnt + } + + test `docount p0` -ne 2 && return 1 + test `docount p1` -ne 5 && return 1 + + return 0 +} + +# test online restart while clients running +test_online_restart() { + for i in `seq 1 5`; do + for j in `seq 1 10`; do + psql -c "select now() as sleeping from pg_sleep(0.2)" p0 & + done + + pid1=`cat $BOUNCER_PID` + echo "old bouncer is $pid1" + $BOUNCER_EXE -d -R $BOUNCER_INI + sleep 2 + pid2=`cat $BOUNCER_PID` + echo "new bouncer is $pid2" + [ $pid1 = $pid2 ] && return 1 + done + return 0 +} + +# test pause/resume +test_pause_resume() { + rm -f $LOGDIR/test.tmp + for i in `seq 1 50`; do + psql -tAq p0 -c 'select 1 from pg_sleep(0.1)' >>$LOGDIR/test.tmp + done & + + for i in `seq 1 5`; do + admin "pause" + sleep 1 + admin "resume" + sleep 1 + done + + wait + test `wc -l <$LOGDIR/test.tmp` -eq 50 +} + +# test suspend/resume +test_suspend_resume() { + rm -f $LOGDIR/test.tmp + for i in `seq 1 50`; do + psql -tAq p0 -c 'select 1 from pg_sleep(0.1)' >>$LOGDIR/test.tmp + done & + + for i in `seq 1 5`; do + psql -h /tmp -p $BOUNCER_PORT pgbouncer -U pgbouncer <<-PSQL_EOF + suspend; + \! sleep 1 + resume; + \! sleep 1 + PSQL_EOF + done + + wait + test `wc -l <$LOGDIR/test.tmp` -eq 50 +} + +# test pool database restart +test_database_restart() { + admin "set server_login_retry=1" + + psql p0 -c "select now() as p0_before_restart" + pgctl -m fast restart + echo `date` restart 1 + psql p0 -c "select now() as p0_after_restart" || return 1 + + + # do with some more clients + for i in `seq 1 5`; do + psql p0 -c "select pg_sleep($i)" & + psql p1 -c "select pg_sleep($i)" & + done + + pgctl -m fast restart + echo `date` restart 2 + + wait + psql p0 -c "select now() as p0_after_restart" || return 1 +} + +# test connect string change +test_database_change() { + admin "set server_lifetime=2" + + db1=`psql -tAq p1 -c "select current_database()"` + + cp test.ini test.ini.bak + sed 's/\(p1 = port=6666 host=127.0.0.1 dbname=\)\(p1\)/\1p0/g' test.ini >test2.ini + mv test2.ini test.ini + + kill -HUP `cat $BOUNCER_PID` + + sleep 3 + db2=`psql -tAq p1 -c "select current_database()"` + + echo "db1=$db1 db2=$db2" + cp test.ini.bak test.ini + rm test.ini.bak + + admin "show databases" + admin "show pools" + + test $db1 = "p1" -a $db2 = "p0" +} + +echo "Testing for sudo access." +sudo true && CAN_SUDO=1 + +testlist=" +test_server_login_retry +test_client_idle_timeout +test_server_lifetime +test_server_idle_timeout +test_query_timeout +test_server_connect_timeout_establish +test_server_connect_timeout_reject +test_server_check_delay +test_max_client_conn +test_pool_size +test_online_restart +test_pause_resume +test_suspend_resume +test_database_restart +test_database_change +" + +if [ $# -gt 0 ]; then + testlist=$@ +fi + +for test in $testlist +do + runtest $test +done + +complete + diff --git a/test/userlist.txt b/test/userlist.txt new file mode 100644 index 0000000..166f46d --- /dev/null +++ b/test/userlist.txt @@ -0,0 +1,3 @@ +"marko" "asdasd" +"postgres" "asdasd" +"pgbouncer" "fake"