--- /dev/null
+
+Marko Kreen <marko.kreen@skype.net> - main coder
+Martin Pihlak <martin.pihlak@skype.net> - head inquisitor
+
--- /dev/null
+PgBouncer - Lightweight connection pooler for PostgreSQL.
+
+Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
--- /dev/null
+
+# sources
+SRCS = client.c loader.c objects.c pooler.c proto.c sbuf.c server.c util.c \
+ admin.c stats.c takeover.c md5.c janitor.c pktbuf.c main.c
+HDRS = client.h loader.h objects.h pooler.h proto.h sbuf.h server.h util.h \
+ admin.h stats.h takeover.h md5.h janitor.h pktbuf.h bouncer.h \
+ list.h mbuf.h system.h
+
+# data & dirs to include in tgz
+DATA = README etc/pgbouncer.ini Makefile config.mak.in config.h.in \
+ configure configure.ac debian/packages debian/changelog
+DIRS = etc src debian
+
+# keep autoconf stuff separate
+-include config.mak
+
+# calculate full-path values
+OBJS = $(SRCS:.c=.o)
+hdrs = $(addprefix $(srcdir)/src/, $(HDRS))
+srcs = $(addprefix $(srcdir)/src/, $(SRCS))
+objs = $(addprefix $(builddir)/lib/, $(OBJS))
+FULL = $(PACKAGE_TARNAME)-$(PACKAGE_VERSION)
+DISTFILES = $(DIRS) $(DATA) $(srcs) $(hdrs)
+
+# Quiet by default, 'make V=1' shows commands
+V=0
+ifeq ($(V), 0)
+Q = @
+E = @echo
+else
+Q =
+E = @true
+endif
+
+## actual targets now ##
+
+# default target
+all: pgbouncer
+
+# final executable
+pgbouncer: config.mak $(objs)
+ $(E) " LD" $@
+ $(Q) $(CC) -o $@ $(LDFLAGS) $(objs) $(LIBS)
+
+# objects depend on all the headers
+$(builddir)/lib/%.o: $(srcdir)/src/%.c config.mak $(hdrs)
+ @mkdir -p $(builddir)/lib
+ $(E) " CC" $<
+ $(Q) $(CC) -c -o $@ $< $(DEFS) $(CFLAGS) $(CPPFLAGS)
+
+# install binary and other stuff
+install: pgbouncer
+ mkdir -p $(DESTDIR)$(bindir)
+ mkdir -p $(DESTDIR)$(docdir)
+ $(BININSTALL) -m 755 pgbouncer $(DESTDIR)$(bindir)
+ $(INSTALL) -m 644 $(srcdir)/etc/pgbouncer.ini $(DESTDIR)$(docdir)
+
+# create tarfile
+tgz: config.mak $(DISTFILES)
+ rm -rf $(FULL) $(FULL).tgz
+ mkdir $(FULL)
+ (for f in $(DISTFILES); do echo $$f; done) | cpio -p $(FULL)
+ tar czf $(FULL).tgz $(FULL)
+ rm -rf $(FULL)
+
+# create debian package
+deb: configure
+ yada rebuild
+ debuild -uc -us -b
+
+# clean object files
+clean:
+ rm -f *~ src/*~ *.o src/*.o lib/*.o pgbouncer core core.*
+
+# clean configure results
+distclean: clean
+ rm -f config.h config.log config.status config.mak
+ rm -rf lib autom4te*
+
+# clean autoconf results
+realclean: distclean
+ rm -f aclocal* config.h.in configure depcomp install-sh missing
+ rm -f tags
+
+# generate configure script and config.h.in
+boot: distclean
+ autoreconf -i -f
+ rm -rf autom4te* config.h.in~
+
+# targets can depend on this to force ./configure
+config.mak::
+ @test -f configure || { \
+ echo "Please run 'make boot && ./configure' first.";exit 1;}
+ @test -f $@ || { echo "Please run ./configure first.";exit 1;}
+
+# targets can depend on this to force 'make boot'
+configure::
+ @test -f $@ || { echo "Please run 'make boot' first.";exit 1;}
+
+# create tags file
+tags: $(srcs) $(hdrs)
+ if test -f ../libevent/event.h; then \
+ ctags $(srcs) $(hdrs) ../libevent/*.[ch]; \
+ else \
+ ctags $(srcs) $(hdrs); \
+ fi
+
+# fixes for macos
+SPARSE_MACOS=-D__STDC_VERSION__=199901 -D__LP64__=0 -DSENDFILE=1 \
+ -I/usr/lib/gcc/i486-linux-gnu/4.1.2/include
+# sparse does not have any identity
+SPARCE_FLAGS=-D__LITTLE_ENDIAN__ -D__i386__ -D__GNUC__=3 -D__GNUC_MINOR__=0 \
+ -Wno-transparent-union \
+ -Wall $(SPARSE_MACOS) $(CPPFLAGS) $(DEFS)
+
+# run sparse over code
+check: config.mak
+ $(E) " CHECK" $(srcs)
+ $(Q) sparse $(SPARCE_FLAGS) $(srcs)
+
--- /dev/null
+
+PgBouncer
+=========
+
+Lightweight connection pooler for PostgreSQL.
+
+Docs: http://developer.skype.com/SkypeGarage/DbProjects/PgBouncer
+Source: http://pgfoundry.org/projects/pgbouncer
+
+Building
+---------
+
+PgBouncer uses libevent for low-level socket handling. When this is
+installed just run:
+
+ $ ./configure --prefix=/usr/local --with-libevent=/prefix
+ $ make
+ $ make install
+
+If the OS does not have libevent available as package, it can be
+downloaded from http://monkey.org/~provos/libevent/
+
--- /dev/null
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+
+DEFS = @DEFS@
+LIBS = @LIBS@
+CC = @CC@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CFLAGS = @CFLAGS@
+LDFLAGS = @LDFLAGS@
+
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+bindir = @bindir@
+datarootdir = @datarootdir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+docdir = @docdir@
+
+top_builddir = @top_builddir@
+srcdir = @srcdir@
+abs_srcdir = @abs_srcdir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+builddir = @builddir@
+abs_builddir = @abs_builddir@
+abs_top_builddir = @abs_top_builddir@
+
+# autoconf does not want to find 'install'
+# if im not using automake...
+INSTALL = @INSTALL@
+BININSTALL = @BININSTALL@
+
--- /dev/null
+dnl Process this file with autoconf to produce a configure script.
+
+AC_INIT(pgbouncer, 1.0)
+AC_CONFIG_SRCDIR(src/bouncer.h)
+AC_CONFIG_HEADER(config.h)
+
+dnl Checks for programs.
+AC_PROG_CC
+AC_PROG_CPP
+
+dnl Additional gcc tuning
+if test x"$GCC" = xyes; then
+ AC_MSG_CHECKING([for working warning swithces])
+ good_CFLAGS="$CFLAGS"
+ good="-Wall"
+ flags="-Wextra"
+ # turn off noise from Wextra
+ flags="$flags -Wno-unused-parameter -Wno-sign-compare"
+ flags="$flags -Wno-missing-field-initializers"
+ # Wextra does not turn those on?
+ flags="$flags -Wmissing-prototypes -Wpointer-arith -Wendif-labels"
+ flags="$flags -Wdeclaration-after-statement -Wold-style-definition"
+ flags="$flags -Wstrict-prototypes"
+ for f in $flags; do
+ CFLAGS="$good_CFLAGS $good $f"
+ AC_COMPILE_IFELSE([void foo(void){}], [good="$good $f"])
+ done
+ CFLAGS="$good_CFLAGS $good"
+ AC_MSG_RESULT([$good])
+fi
+
+dnl Checks for header files.
+AC_CHECK_HEADERS([crypt.h sys/socket.h sys/ucred.h])
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_INLINE
+AC_TYPE_PID_T
+AC_TYPE_SIZE_T
+AC_TYPE_UINT8_T
+AC_TYPE_UINT32_T
+AC_TYPE_UINT64_T
+AC_SYS_LARGEFILE
+
+dnl Checks for library functions.
+AC_CHECK_FUNCS(strlcpy strlcat)
+AC_SEARCH_LIBS(crypt, crypt, [], AC_MSG_ERROR([crypt not found]))
+
+dnl Find libevent
+AC_MSG_CHECKING([for libevent])
+AC_ARG_WITH(libevent,
+ AC_HELP_STRING([--with-libevent=prefix],[Specify where libevent is installed]),
+ [ test "$withval" = "no" && AC_MSG_ERROR("cannot work without libevent")
+ CPPFLAGS="$CPPFLAGS -I$withval/include"
+ LDFLAGS="$LDFLAGS -L$withval/lib" ])
+LIBS="$LIBS -levent"
+AC_LINK_IFELSE([
+ #include <sys/types.h>
+ #include <sys/time.h>
+ #include <stdio.h>
+ #include <event.h>
+ int main(void) {
+ struct event ev;
+ event_init();
+ event_set(&ev, 1, EV_READ, NULL, NULL);
+ } ],
+[AC_MSG_RESULT([found])],
+[AC_MSG_ERROR([not found])])
+
+# autoconf does not want to find 'install', if not using automake...
+INSTALL=install
+
+AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug],[build binary with debugging symbols]))
+AC_MSG_CHECKING([whether to build debug binary])
+if test "$enable_debug" = "yes"; then
+ LDFLAGS="-g $LDFLAGS"
+ CFLAGS="`echo $CFLAGS | sed -e 's/-O2/-O/g'`"
+ BININSTALL="$INSTALL"
+ AC_MSG_RESULT([yes])
+else
+ if test x"$GCC" = xyes; then
+ CFLAGS="$CFLAGS -fomit-frame-pointer"
+ fi
+ BININSTALL="$INSTALL -s"
+ AC_MSG_RESULT([no])
+fi
+AC_SUBST(INSTALL)
+AC_SUBST(BININSTALL)
+
+AC_ARG_ENABLE(cassert, AC_HELP_STRING([--enable-cassert],[turn on assert checking in code]))
+AC_MSG_CHECKING([whether to enable asserts])
+if test "$enable_cassert" = "yes"; then
+ AC_DEFINE(CASSERT, 1, [Define to enable assert checking])
+ AC_MSG_RESULT([yes])
+else
+ AC_MSG_RESULT([no])
+fi
+
+AC_ARG_ENABLE(werror, AC_HELP_STRING([--enable-werror],[add -Werror to CFLAGS]))
+AC_MSG_CHECKING([whether to fail on warnings])
+if test "$enable_werror" = "yes"; then
+ CFLAGS="$CFLAGS -Werror"
+ AC_MSG_RESULT([yes])
+else
+ AC_MSG_RESULT([no])
+fi
+
+dnl Output findings
+AC_OUTPUT([config.mak])
+
+dnl If separate build dir, link Makefile over
+test -f Makefile || {
+ echo "Linking Makefile"
+ ln -s $srcdir/Makefile
+}
+
--- /dev/null
+pgbouncer (1.0) unstable; urgency=low
+
+ * Public release.
+
+ -- Marko Kreen <marko.kreen@skype.net> Tue, 13 Mar 2007 17:30:02 +0200
--- /dev/null
+## debian/packages for pgbouncer
+
+Source: pgbouncer
+Section: contrib/misc
+Priority: extra
+Maintainer: Marko Kreen <marko.kreen@skype.net>
+Standards-Version: 3.6.2
+Description: Lightweight connection pooler for PostgreSQL
+Copyright: BSD
+ Copyright 2007 Marko Kreen, Skype Technologies
+Build: sh
+ CPPFLAGS="-I$HOME/src/libevent" \
+ LDFLAGS="-L$HOME/src/libevent/.libs" \
+ ./configure --prefix=/usr --enable-debug --enable-cassert
+ make V=1
+Clean: sh
+ make clean || true
+#Build-Depends: libevent-dev
+
+Package: pgbouncer
+Architecture: any
+Contains: unstripped
+Depends: []
+Description: Lightweight connection pooler for PostgreSQL
+ .
+Install: sh
+ make install DESTDIR=$ROOT
--- /dev/null
+
+wiki = https://developer.skype.com/SkypeGarage/DbProjects/PgBouncer
+web = mkz@shell.pgfoundry.org:/home/pgfoundry.org/groups/pgbouncer/htdocs/
+
+all:
+
+upload:
+ devupload.sh overview.txt $(wiki)
+ devupload.sh todo.txt $(wiki)/ToDo
+ devupload.sh usage.txt $(wiki)/UsageInfo
+ devupload.sh config.txt $(wiki)/ConfigFile
+
--- /dev/null
+#pragma section-numbers 2
+
+= PgBouncer Config =
+
+[[TableOfContents]]
+
+Config file is in "ini" format. Section names are between "[" and "]".
+Lines starting with ";" or "#" are taken as comment and ignored. The characters
+";" and "#" are not recognized when they appear later in the line.
+
+
+
+== Section [pgbouncer] ==
+
+=== Generic settings ===
+
+==== logfile ====
+Specifies log file. Logging is done by open/write/close, so it can be safely
+rotasted, without informing pooler.
+
+Default: not set.
+
+
+==== pidfile ====
+Specifies pid file. Without pidfile, the daemonization is not allowed.
+
+Default: not set.
+
+
+==== listen_addr ====
+Specifies IPv4 address, where to listen for TCP connections. Or "*"
+meaning "listen on all addresses". When not set, only unix socket
+connections are allowed.
+
+Default: not set.
+
+==== listen_port ====
+On which port to listen on. Applies to both TCP and Unix sockets.
+
+Default: 6000
+
+==== unix_socket_dir ====
+Specifies location for Unix sockets. Applies to both listening socket
+and server connections. If set to empty string, Unix sockets are disabled.
+
+Default: /tmp
+
+==== auth_file ====
+
+Load user names and passwords from this file. File format used
+is same as for PostgreSQL pg_auth/pg_pwd file, so can be pointed
+directly to backend file.
+
+Default: not set.
+
+==== auth_type ====
+How to authenticate users.
+
+ md5::
+ Use MD5-based password check. auth_file may contain both md5-encrypted
+ or plain-text passwords.
+
+ crypt::
+ Use crypt(3) based bassword check. auth_file must contain plain-text
+ passwords.
+
+ plain::
+ Clear-text password is sent over wire.
+
+ trust::
+ No authentication is done. Username must still exists in auth_file.
+
+ any::
+ Like `trust` but username given is ignored. Requires that all databases
+ have configured to log in as specific user.
+
+Default: md5
+
+==== pool_mode ====
+Specifies when server connection is tagged as reusable for other clients.
+
+ session::
+ Server is released back to pool after client disconnects.
+
+ transaction::
+ Server is released back to pool after transaction finishes.
+
+ statement::
+ Server is released back to pool after query finishes. Long transactions
+ spanning multiple statements are disallowed in this mode.
+
+Default: `session`.
+
+==== max_client_conn ====
+
+Maximin number of client connections allowed.
+
+==== default_pool_size ====
+
+How many server connection to allow per user/database pair.
+Can be overrided in per-database config.
+
+=== Console access control ===
+
+==== admin_users ====
+List of users that are allowed to run all commands on console.
+
+==== stats_users ====
+
+List of users that are allowed to run read-only queries on console.
+Thats means all SHOW commands except SHOW FDS.
+
+=== Connection sanity checks, timeouts ===
+
+==== server_check_delay ====
+
+How long to keep released immidiately available, without running
+sanity-check query on it. If 0 then the query is ran always.
+
+==== server_check_query ====
+
+Good variants are `SELECT 1;`, to just see if connection is alive
+and `ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT`
+to do full reset.
+
+If empty string, then sanity checking is disabled.
+
+==== server_lifetime ====
+
+Pooler tries to close server connections that are been connected
+longer than this.
+
+==== server_idle_timeout ====
+
+If server connection has been idle more than this then there's too many
+connections in the pool and this on can be dropped.
+
+==== server_connect_timeout ====
+
+If connection and login wont finish in this time, the connection will
+be closed.
+
+==== server_login_retry ====
+
+If login failed, because of failure from connect() or authentication
+that pooler waits this much before retrying to connect.
+
+==== query_timeout ====
+
+Queries running longer than that are canceled. This should be used
+only with slightly smaller server-side statement_timeout, to apply only
+for network problems.
+
+Default: 0 (disabled)
+
+==== client_idle_timeout ====
+
+Client connections idling longer than that are closed.
+
+Default: 0 (disabled)
+
+=== Low-level network settings ===
+
+==== pkt_buf ====
+
+Internal buffer size for packets. Affects size of TCP packets sent
+and general memory usage. Actual libpq packets can be larger than this
+so no need to set it large.
+
+Default: 2048
+
+==== tcp_defer_accept ====
+
+Details about following options shouldbe looked from `man 7 tcp`
+
+Default: 45 on Linux, otherwise 0
+
+==== tcp_socket_buffer ====
+
+Default: not set
+
+==== tcp_keepalive ====
+
+Default: Not set
+
+==== tcp_keepcnt ====
+Default: not set
+
+==== tcp_keepidle ====
+Default: not set
+==== tcp_keepintvl ====
+Default: not set
+
+== Section [databases] ==
+
+This contains key=value pairs where key will be taken as database name and value as
+libpq-connstring style list of key=value pairs. As actual libpq is not used, so
+not all features from libpq can be used (service=, quoting).
+
+=== dbname ===
+
+Destination database name.
+
+Default: same as client-side database name.
+
+=== host ===
+
+IP-address to connect to.
+
+Default: not set, meaning to use unix-socket.
+
+=== port ===
+
+Default: 5432
+
+=== user, password ===
+
+If user= is set, all connections to destination database will be done
+with that user, meaning that there will be only one pool for this database.
+
+Otherwise pgbouncer tries to log into destination database with client username,
+meaning that there will be one pool per user.
+
+=== client_encoding, datestyle ===
+
+As pgbouncer does not pass client startup packet to server, there is no way of specifying
+startup paramenters to dest database. These paramenters make possible to set startup
+paramenters in pgbouncer config. Escpecially, client_encoding=UNICODE is needed to work
+around JDBC driver bug.
--- /dev/null
+
+= PgBouncer =
+
+Lightweight connection pooler for PostgreSQL.
+
+Downloads, bugtracker, CVS: http://pgfoundry.org/projects/pgbouncer
+
+== Features ==
+
+ * Several levels of brutality when rotating connections:
+
+ Session pooling::
+ Most polite method. When client connects, a server connection
+ will be assigned to it for the whole duration it stays connected.
+ When client disconnects, the server connection will be put back
+ into pool.
+
+ Transaction pooling::
+ Server connection is assigned to client only during a transaction.
+ When PgBouncer notices that transaction is over, the server
+ will be put back into pool.
+
+ Statement pooling::
+ Most aggressive method. The server connection will be put back into
+ pool immidiately after a query completes. Multi-statement
+ transactions are disallowed in this mode as they would break.
+
+ * Low memory requirements (2k per connection by default). This is due
+ to the fact that PgBouncer does not need to see full packet at once.
+
+ * It is not tied to one backend server, the destination databases can
+ reside on different hosts.
+
+ * Supports online reconfiguration for most of the settings.
+
+ * Supports online restart - is able transfer sockets to new process.
+
+ * Supports protocol V3 only, so backend version must be >= 7.4.
+
+== Docs ==
+
+ * Detailed usage info: ./UsageInfo
+ * COnfig file help: ./ConfigFile
+ * TODO list: ./ToDo
--- /dev/null
+= PgBouncer TODO list =
+
+ * -R should detect that no pooler is running and boot normally
+ * -R should detect if login fails then exit()
+ * PAUSE <db>; RESUME <db>;
+
+ * keep stats about error counts?
+ * SHUTDOWN cmd should print notice?
+ * before loading users, disable all existing?
+
+ * log_connects, log_disconnects settings
+
+== Bugs ==
+
+ * Bouncer can get into situation where SUSPEND wont work (stalls),
+ thus making reboot impossible.
+
+ * Light load with small server_check_delay creates situation where
+ some clients may never get server connection. Applied a hack to
+ fix this, needs more analysis.
+
--- /dev/null
+#pragma section-numbers 2
+
+= PgBouncer usage details =
+
+[[TableOfContents]]
+
+== Building ==
+
+PgBouncer uses [http://monkey.org/~provos/libevent/ libevent]
+for low-level socket handling. When this is installed just run:
+
+{{{
+$ ./configure --prefix=/usr/local --with-libevent=/prefix
+$ make
+$ make install
+}}}
+
+== Command line usage ==
+{{{
+pgbouncer [-d][-R][-v] config.ini
+pgbouncer -V|-h
+}}}
+
+Where switches are:
+
+ -d::
+ Run in background. Without it the process will run in foreground.
+
+ -R::
+ Do a online restart. That means connecting to running process,
+ loading open sockets from it and using them.
+
+ -v::
+ Increase verbosity.
+
+ -V::
+ Show version.
+
+ -h::
+ Show short help.
+
+
+== Admin Console ==
+
+There is always a extra database available: "pgbouncer".
+When connecected to it, there is possible to look and change
+pooler settings.
+
+{{{
+SHOW STATS;
+}}}
+Shows statistics.
+
+{{{
+SHOW SERVERS;
+SHOW CLIENTS;
+SHOW POOLS;
+SHOW LISTS;
+}}}
+Shows internal info.
+
+{{{
+SHOW USERS;
+SHOW DATABASES;
+}}}
+Shows loaded users and databases.
+
+{{{
+SHOW FDS;
+}}}
+Shows list of fds in use. When the connected user has username
+"pgbouncer", connects thru unix socket and has same UID as running process
+the actual fds are passed over connection. This mechanism is used
+to do online restart.
+
+{{{
+PAUSE;
+}}}
+PgBouncer tries to disconnect from all servers, first waiting for
+all queries to complete. The command will not return before all is done.
+
+{{{
+SUSPEND;
+}}}
+
+All socket buffers are flushed and PgBouncer stops listening data on them.
+The command will not return before all is done.
+
+{{{
+RESUME
+}}}
+
+Resume work from previous PAUSE or SUSPEND command.
+
+{{{
+SHUTDOWN
+}}}
+The PgBouncer process will exit.
+
+
+== Online restart ==
+
+PgBouncer supports restart without dropping connections. When launched
+with switch "-R", it will connect to running PgBouncer process via
+unix socket and issues commands:
+
+{{{
+SUSPEND;
+SHOW FDS;
+SHUTDOWN;
+}}}
+Then it waits until old process shuts down and then starts listening on
+aquired sockets.
+
+== Signals ==
+
+ SIGHUP::
+ Reload config.
+
+ SIGINT::
+ Safe shutdown.
+
+ SIGTERM::
+ Immidiate shutdown.
+
+== libevent settings ==
+
+From libevent docs:
+
+{{{
+It is possible to disable support for epoll, kqueue, devpoll, poll or select
+by setting the environment variable EVENT_NOEPOLL, EVENT_NOKQUEUE, EVENT_NODEVPOLL,
+EVENT_NOPOLL or EVENT_NOSELECT, respectively. By setting the environment variable
+EVENT_SHOW_METHOD, libevent displays the kernel notification method that it uses.
+}}}
+
+== Authentication file format ==
+
+PgBouncer needs its own user database. The users are loaded from
+text file that should be in same format as PostgreSQL's pg_auth/pg_pwd
+file.
+
+{{{
+"username1" "password" ...
+"username2" "md12342345234" ...
+}}}
+
+There shoud be at least 2 fields, surrounded by double quotes. First
+is username and second either plain-text or md5-hashed password.
+PgBouncer ignores rest of the line.
+
+Such file format allows to direct PgBouncer directly to PostgreSQL
+user file under data directory.
--- /dev/null
+;; database name = connect string
+[databases]
+
+; foodb over unix socket
+foodb =
+
+; redirect bardb to bazdb on localhost
+bardb = host=127.0.0.1 dbname=bazdb
+
+; acceess to dest database will go with single user
+forcedb = host=127.0.0.1 port=300 user=baz password=foo client_encoding=UNICODE datestyle=ISO
+
+;; Configuation section
+[pgbouncer]
+
+;;;
+;;; Administrative settings
+;;;
+
+logfile = pgbouncer.log
+pidfile = pgbouncer.pid
+
+;;;
+;;; Where to wait for clients
+;;;
+
+; ip address or * which means all ip-s
+listen_addr = 127.0.0.1
+listen_port = 6000
+unix_socket_dir = /tmp
+
+;;;
+;;; Authentication settings
+;;;
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+#auth_file = 8.0/main/global/pg_auth
+auth_file = etc/userlist.txt
+
+;;;
+;;; Users allowed into database 'pgbouncer'
+;;;
+
+; comma-separated list of users, who are allowed to change settings
+admin_users = user2, someadmin, otheradmin
+
+; comma-separated list of users who are just allowed to use SHOW command
+stats_users = stats, root
+
+;;;
+;;; Pooler personality questions
+;;;
+
+; When server connection is released back to pool:
+; session - after client disconnects
+; transaction - after transaction finishes
+; statement - after statement finishes
+pool_mode = session
+
+; When taking idle server into use, this query is ran first.
+;
+; Query for session pooling:
+; ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT
+; Query for statement/transaction pooling:
+; SELECT 1
+; Empty query disables the functionality
+server_check_query = select 1
+
+; If server was used more recently that this many seconds ago,
+; skip the check query. If 0, the check query is always ran.
+server_check_delay = 10
+
+;;;
+;;; Connection limits
+;;;
+
+; total number of clients that can connect
+max_client_conn = 100
+default_pool_size = 20
+
+;;;
+;;; Timeouts
+;;;
+
+;; Close server connection if its been connected longer.
+;server_lifetime = 1200
+
+;; Close server connection if its not been used in this time.
+;; Allows to clean unneccessary connections from pool after peak.
+;server_idle_timeout = 60
+
+;; Cancel connection attepmt if server does not answer takes longer.
+;server_connect_timeout = 15
+
+;; If server login failed (server_connect_timeout or auth failure)
+;; then wait this many second.
+;server_login_retry = 15
+
+;; Dangerous. Server connection is closed if query does not return
+;; in this time. Should be used to survive network problems,
+;; _not_ as statement_timeout. (default: 0)
+;query_timeout = 0
+
+;; Dangerous. Client connection is closed if no activity in this time.
+;; Should be used to survive network problems. (default: 0)
+;client_idle_timeout = 0
+
+
+;;;
+;;; Low-level tuning options
+;;;
+
+;; buffer for streaming packets
+;pkt_buf = 2048
+
+;; networking options, for info: man 7 tcp
+
+;; linux: notify program about new connection only if there
+;; is also data received. (Seconds to wait.)
+;; On Linux the default is 45, on other OS'es 0.
+;tcp_defer_accept = 0
+
+;; In-kernel buffer size (linux default: 4096)
+;tcp_socket_buffer = 0
+
+;; whether tcp keepalive should be turned on (0/1)
+;tcp_keepalive = 0
+
+;; following options are linux-specific.
+;; they also require tcp_keepalive=1
+
+;; count of keepaliva packets
+;tcp_keepcnt = 0
+
+;; how long the connection can be idle,
+;; before sending keepalive packets
+;tcp_keepidle = 0
+
+;; The time between individual keepalive probes.
+;tcp_keepintvl = 0
+
--- /dev/null
+
+[databases]
+evtest = host=127.0.0.1
+provider = host=127.0.0.1
+postgres = host=127.0.0.1
+orderdb = host=127.0.0.1
+forcedb = host=127.0.0.1 port=300 user=baz password=foo client_encoding=UNICODE datestyle=ISO
+marko = host=127.0.0.1 port=5432 pool_size=5
+orderdb_test = host=192.168.125.155
+test_part = host=127.0.0.1
+
+[pgbouncer]
+logfile = pgbouncer.log
+;pidfile = pgbouncer.pid
+
+listen_addr = 127.0.0.1
+listen_port = 6000
+unix_socket_dir = /tmp
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+#auth_file = 8.0/main/global/pg_auth
+auth_file = etc/userlist.txt
+
+; session, transaction, statement
+pool_mode = session
+
+max_client_conn = 100
+default_pool_size = 20
+
--- /dev/null
+[databases]
+marko = host=127.0.0.1
+
+[pgbouncer]
+logfile = lib/pgbouncer.log
+pidfile = lib/pgbouncer.pid
+
+#listen_addr = 127.0.0.1
+listen_port = 6000
+unix_socket_dir = /tmp
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+auth_file = etc/test.users
+
+; When server connection is released back to pool:
+; session - after client disconnects
+; transaction - after transaction finishes
+; statement - after statement finishes
+pool_mode = transaction
+
+server_check_query = select 1
+server_check_delay = 10
+max_client_conn = 2000
+default_pool_size = 80
+
+admin_users = plproxy
+stats_users = marko
+
+stats_period = 60
+
--- /dev/null
+"admin" "" ""
+"backoffice" "" ""
+"info" "" ""
+"martinp" "md55c06ac8c93212495f8eaf6a7ffd688dd" ""
+"plproxy" "md5a704fc5c9a4bf2f745acc6f7a7ec2f2f" ""
+"postgres" "md5264abda62970ba635b133f545ce12132" ""
+"priitk" "md55c08f2e34592ddb13972db7eaadc1232" ""
+"replicator" "" ""
+"webstore" "" ""
+"wypbe" "md57e17e9c6cfde1c1f6f9155071d7d18a8" ""
+"wypfe" "md5e3b7c35f688032d97ab066210a33184b" ""
+"marko" "funky"
--- /dev/null
+"marko" "asdasd"
+"postgres" "asdasd"
+"pgbouncer" "fake"
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bouncer.h"
+
+#include <regex.h>
+
+/* regex elements */
+#define WS0 "[ \t\n\r]*"
+#define WS1 "[ \t\n\r]+"
+#define WORD "([0-9a-z_]+)"
+#define STRING "'(([^']*|'')*)'"
+
+/* possible max + 1 */
+#define MAX_GROUPS 10
+
+/* group numbers */
+#define SHOW_ARG 1
+#define SET_KEY 1
+#define SET_VAL 2
+#define SINGLECMD 1
+
+/* SHOW */
+static const char cmd_show_rx[] =
+"^" WS0 "show" WS1 WORD "?" WS0 ";" WS0 "$";
+
+/* SET with simple value */
+static const char cmd_set_word_rx[] =
+"^" WS0 "set" WS1 WORD WS0 "=" WS0 WORD WS0 ";" WS0 "$";
+
+/* SET with quoted value */
+static const char cmd_set_str_rx[] =
+"^" WS0 "set" WS1 WORD WS0 "=" WS0 STRING WS0 ";" WS0 "$";
+
+/* single word cmd */
+static const char cmd_single_rx[] =
+"^" WS0 WORD ";" WS0 "$";
+
+/* compiled regexes */
+static regex_t rc_show;
+static regex_t rc_set_word;
+static regex_t rc_set_str;
+static regex_t rc_single;
+
+static PgPool *admin_pool;
+
+bool admin_error(PgSocket *admin, const char *fmt, ...)
+{
+ char str[1024];
+ va_list ap;
+ bool res = true;
+
+ va_start(ap, fmt);
+ vsnprintf(str, sizeof(str), fmt, ap);
+ va_end(ap);
+
+ log_error("%s", str);
+ if (admin)
+ res = send_pooler_error(admin, true, str);
+ return res;
+}
+
+void admin_flush(PgSocket *admin, PktBuf *buf, const char *desc)
+{
+ pktbuf_write_CommandComplete(buf, desc);
+ pktbuf_write_ReadyForQuery(buf);
+ pktbuf_send_queued(buf, admin);
+}
+
+bool admin_ready(PgSocket *admin, const char *desc)
+{
+ PktBuf buf;
+ uint8 tmp[512];
+ pktbuf_static(&buf, tmp, sizeof(tmp));
+ pktbuf_write_CommandComplete(&buf, desc);
+ pktbuf_write_ReadyForQuery(&buf);
+ return pktbuf_send_immidiate(&buf, admin);
+}
+
+/* Command: SET key = val; */
+static bool admin_set(PgSocket *admin, const char *key, const char *val)
+{
+ char tmp[512];
+
+ if (admin->admin_user) {
+ if (set_config_param(bouncer_params, key, val, true, admin)) {
+ snprintf(tmp, sizeof(tmp), "SET %s=%s", key, val);
+ return admin_ready(admin, tmp);
+ } else {
+ return admin_error(admin, "SET failed");
+ }
+ } else
+ return admin_error(admin, "admin access needed");
+}
+
+/* send a row with sendmsg, optionally attaching a fd */
+static bool send_one_fd(PgSocket *admin,
+ int fd, const char *task,
+ const char *user, const char *db,
+ const char *addr, int port,
+ uint64 ckey, int link)
+{
+ struct msghdr msg;
+ struct cmsghdr *cmsg;
+ int res;
+ struct iovec iovec;
+ uint8 pktbuf[1024];
+ uint8 cntbuf[CMSG_SPACE(sizeof(int))];
+
+ iovec.iov_base = pktbuf;
+ BUILD_DataRow(res, pktbuf, sizeof(pktbuf), "issssiqi",
+ fd, task, user, db, addr, port, ckey, link);
+ if (res < 0)
+ return false;
+ iovec.iov_len = res;
+
+ /* sending fds */
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iovec;
+ msg.msg_iovlen = 1;
+
+ /* attach a fd */
+ if (admin->addr.is_unix && admin->own_user) {
+ msg.msg_control = cntbuf;
+ msg.msg_controllen = sizeof(cntbuf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+ msg.msg_controllen = cmsg->cmsg_len;
+ }
+
+ slog_debug(admin, "sending socket list: fd=%d, len=%d",
+ fd, msg.msg_controllen);
+ res = safe_sendmsg(sbuf_socket(&admin->sbuf), &msg, 0);
+ if (res < 0) {
+ log_error("send_one_fd: sendmsg error: %s", strerror(errno));
+ return false;
+ } else if (res != iovec.iov_len) {
+ log_error("send_one_fd: partial sendmsg");
+ return false;
+ }
+ return true;
+}
+
+/* send a row with sendmsg, optionally attaching a fd */
+static bool show_one_fd(PgSocket *admin, PgSocket *sk)
+{
+ PgAddr *addr = &sk->addr;
+ MBuf tmp;
+
+ mbuf_init(&tmp, sk->cancel_key, 8);
+
+ return send_one_fd(admin, sbuf_socket(&sk->sbuf),
+ is_server_socket(sk) ? "server" : "client",
+ sk->auth_user ? sk->auth_user->name : NULL,
+ sk->pool ? sk->pool->db->name : NULL,
+ addr->is_unix ? "unix" : inet_ntoa(addr->ip_addr),
+ addr->port,
+ mbuf_get_uint64(&tmp),
+ sk->link ? sbuf_socket(&sk->link->sbuf) : 0);
+}
+
+/* send a row with sendmsg, optionally attaching a fd */
+static bool show_pooler_fds(PgSocket *admin)
+{
+ int fd_net, fd_unix;
+ bool res = true;
+
+ get_pooler_fds(&fd_net, &fd_unix);
+
+ if (fd_net)
+ res = send_one_fd(admin, fd_net, "pooler", NULL, NULL,
+ cf_listen_addr, cf_listen_port, 0, 0);
+ if (fd_unix && res)
+ res = send_one_fd(admin, fd_unix, "pooler", NULL, NULL,
+ "unix", cf_listen_port, 0, 0);
+ return res;
+}
+
+static bool show_fds_from_list(PgSocket *admin, StatList *list)
+{
+ List *item;
+ PgSocket *sk;
+ bool res = true;
+
+ statlist_for_each(item, list) {
+ sk = container_of(item, PgSocket, head);
+ res = show_one_fd(admin, sk);
+ if (!res)
+ break;
+ }
+ return res;
+}
+
+/*
+ * Command: SHOW FDS
+ *
+ * If privileged connection, send also actual fds
+ */
+static bool admin_show_fds(PgSocket *admin)
+{
+ List *item;
+ PgPool *pool;
+ bool res;
+
+ /*
+ * Dangerous to show to everybody:
+ * - can lock pooler as code flips async option
+ * - show cancel keys for all users
+ */
+ if (!admin->admin_user)
+ return admin_error(admin, "admin access needed");
+
+ /*
+ * Its very hard to send it reliably over in async manner,
+ * so turn async off for this resultset.
+ */
+ socket_set_nonblocking(sbuf_socket(&admin->sbuf), 0);
+
+ /*
+ * send resultset
+ */
+ SEND_RowDescription(res, admin, "issssiqi",
+ "fd", "task",
+ "user", "database",
+ "addr", "port",
+ "cancel", "link");
+ if (res)
+ res = show_pooler_fds(admin);
+
+ if (res)
+ res = show_fds_from_list(admin, &login_client_list);
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ if (pool->admin)
+ continue;
+ res = res && show_fds_from_list(admin, &pool->active_client_list);
+ res = res && show_fds_from_list(admin, &pool->waiting_client_list);
+ res = res && show_fds_from_list(admin, &pool->active_server_list);
+ res = res && show_fds_from_list(admin, &pool->idle_server_list);
+ res = res && show_fds_from_list(admin, &pool->used_server_list);
+ res = res && show_fds_from_list(admin, &pool->tested_server_list);
+ res = res && show_fds_from_list(admin, &pool->new_server_list);
+ if (!res)
+ break;
+ }
+ if (res)
+ res = admin_ready(admin, "SHOW");
+
+ /* turn async back on */
+ socket_set_nonblocking(sbuf_socket(&admin->sbuf), 1);
+
+ return res;
+}
+
+/* Command: SHOW DATABASES */
+static bool admin_show_databases(PgSocket *admin)
+{
+ PgDatabase *db;
+ List *item;
+ char *host;
+ const char *f_user;
+ PktBuf *buf;
+
+ buf = pktbuf_dynamic(256);
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+
+ pktbuf_write_RowDescription(buf, "ssissi",
+ "name", "host", "port",
+ "database", "force_user", "pool_size");
+ statlist_for_each(item, &database_list) {
+ db = container_of(item, PgDatabase, head);
+
+ if (!db->addr.is_unix) {
+ host = inet_ntoa(db->addr.ip_addr);
+ } else
+ host = NULL;
+
+ f_user = db->forced_user ? db->forced_user->name : NULL;
+ pktbuf_write_DataRow(buf, "ssissi",
+ db->name, host, db->addr.port,
+ db->dbname, f_user,
+ db->pool_size);
+ }
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+
+/* Command: SHOW LISTS */
+static bool admin_show_lists(PgSocket *admin)
+{
+ PktBuf *buf = pktbuf_dynamic(256);
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+ pktbuf_write_RowDescription(buf, "si", "list", "items");
+#define SENDLIST(name, size) pktbuf_write_DataRow(buf, "si", (name), (size))
+ SENDLIST("databases", statlist_count(&database_list));
+ SENDLIST("users", statlist_count(&user_list));
+ SENDLIST("pools", statlist_count(&pool_list));
+ SENDLIST("free_clients", statlist_count(&free_client_list));
+ SENDLIST("used_clients", get_active_client_count());
+ SENDLIST("login_clients", statlist_count(&login_client_list));
+ SENDLIST("free_servers", statlist_count(&free_server_list));
+ SENDLIST("used_servers", get_active_server_count());
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+/* Command: SHOW USERS */
+static bool admin_show_users(PgSocket *admin)
+{
+ PgUser *user;
+ List *item;
+ PktBuf *buf = pktbuf_dynamic(256);
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+ pktbuf_write_RowDescription(buf, "s", "name");
+ statlist_for_each(item, &user_list) {
+ user = container_of(item, PgUser, head);
+ pktbuf_write_DataRow(buf, "s", user->name);
+ }
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+/* Helper for SHOW CLIENTS */
+static void show_client_list(PktBuf *buf, StatList *list, const char *state)
+{
+ List *item;
+ PgSocket *client;
+ const char *addr;
+
+ statlist_for_each(item, list) {
+ client = container_of(item, PgSocket, head);
+ addr = client->addr.is_unix ? "unix"
+ : inet_ntoa(client->addr.ip_addr);
+
+ pktbuf_write_DataRow(buf, "ssssiTT",
+ client->auth_user->name,
+ client->pool->db->name,
+ state, addr, client->addr.port,
+ client->connect_time,
+ client->request_time);
+ }
+}
+
+/* Command: SHOW CLIENTS */
+static bool admin_show_clients(PgSocket *admin)
+{
+ List *item;
+ PgPool *pool;
+ PktBuf *buf = pktbuf_dynamic(256);
+
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+ pktbuf_write_RowDescription(buf, "ssssiTT",
+ "user", "database", "state",
+ "addr", "port", "connect_time", "request_time");
+ /* todo: age? query stats? */
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+
+ show_client_list(buf, &pool->active_client_list, "active");
+ show_client_list(buf, &pool->waiting_client_list, "waiting");
+ }
+
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+/* Helper for SHOW SERVERS */
+static void show_server_list(PktBuf *buf, StatList *list, const char *state)
+{
+ List *item;
+ PgSocket *server;
+ const char *addr;
+
+ statlist_for_each(item, list) {
+ server = container_of(item, PgSocket, head);
+ addr = server->addr.is_unix ? "unix"
+ : inet_ntoa(server->addr.ip_addr);
+
+ pktbuf_write_DataRow(buf, "ssssiTT",
+ server->auth_user->name,
+ server->pool->db->name,
+ state, addr, server->addr.port,
+ server->connect_time,
+ server->request_time
+ );
+ }
+}
+
+/* Command: SHOW SERVERS */
+static bool admin_show_servers(PgSocket *admin)
+{
+ List *item;
+ PgPool *pool;
+ PktBuf *buf;
+
+ buf = pktbuf_dynamic(256);
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+ pktbuf_write_RowDescription(buf, "ssssiTT",
+ "database", "user", "state",
+ "addr", "port", "connect_time", "request_time");
+ /* todo: age? query stats */
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ show_server_list(buf, &pool->active_server_list, "active");
+ show_server_list(buf, &pool->idle_server_list, "idle");
+ show_server_list(buf, &pool->used_server_list, "used");
+ show_server_list(buf, &pool->tested_server_list, "tested");
+ }
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+/* Command: SHOW POOLS */
+static bool admin_show_pools(PgSocket *admin)
+{
+ List *item;
+ PgPool *pool;
+ PktBuf *buf;
+
+ buf = pktbuf_dynamic(256);
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+ pktbuf_write_RowDescription(buf, "ssiiiiiii",
+ "database", "user",
+ "cl_active", "cl_waiting",
+ "sv_active", "sv_idle",
+ "sv_used", "sv_tested",
+ "sv_login");
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ pktbuf_write_DataRow(buf, "ssiiiiiii",
+ pool->db->name, pool->user->name,
+ statlist_count(&pool->active_client_list),
+ statlist_count(&pool->waiting_client_list),
+ statlist_count(&pool->active_server_list),
+ statlist_count(&pool->idle_server_list),
+ statlist_count(&pool->used_server_list),
+ statlist_count(&pool->tested_server_list),
+ statlist_count(&pool->new_server_list));
+ }
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+/* Command: SHOW CONFIG */
+static bool admin_show_config(PgSocket *admin)
+{
+ ConfElem *cf;
+ int i = 0;
+ PktBuf *buf;
+
+ buf = pktbuf_dynamic(256);
+ if (!buf) {
+ admin_error(admin, "no mem");
+ return true;
+ }
+
+ pktbuf_write_RowDescription(buf, "sss", "key", "value", "changeable");
+ while (1) {
+ cf = &bouncer_params[i++];
+ if (!cf->name)
+ break;
+
+ pktbuf_write_DataRow(buf, "sss",
+ cf->name, conf_to_text(cf),
+ cf->reloadable ? "yes" : "no");
+ }
+ admin_flush(admin, buf, "SHOW");
+ return true;
+}
+
+/* Command: RELOAD */
+static bool admin_cmd_reload(PgSocket *admin)
+{
+ if (!admin->admin_user)
+ return admin_error(admin, "admin access needed");
+
+ log_info("RELOAD command issued");
+ load_config(true);
+ return admin_ready(admin, "RELOAD");
+}
+
+/* Command: SHUTDOWN */
+static bool admin_cmd_shutdown(PgSocket *admin)
+{
+ if (!admin->admin_user)
+ return admin_error(admin, "admin access needed");
+
+ log_info("SHUTDOWN command issued");
+ exit(0);
+ return true;
+}
+
+/* Command: RESUME */
+static bool admin_cmd_resume(PgSocket *admin)
+{
+ int tmp_mode = cf_pause_mode;
+ if (!admin->admin_user)
+ return admin_error(admin, "admin access needed");
+
+ log_info("RESUME command issued");
+ cf_pause_mode = 0;
+ switch (tmp_mode) {
+ case 2:
+ resume_all();
+ case 1:
+ return admin_ready(admin, "RESUME");
+ default:
+ return admin_error(admin, "Pooler is not paused/suspended");
+ }
+}
+
+/* Command: SUSPEND */
+static bool admin_cmd_suspend(PgSocket *admin)
+{
+ if (!admin->admin_user)
+ return admin_error(admin, "admin access needed");
+
+ if (cf_pause_mode)
+ return admin_error(admin, "already suspended/paused");
+
+ log_info("SUSPEND command issued");
+ cf_pause_mode = 2;
+ admin->wait_for_response = 1;
+ suspend_pooler();
+
+ return true;
+}
+
+/* Command: PAUSE */
+static bool admin_cmd_pause(PgSocket *admin)
+{
+ if (!admin->admin_user)
+ return admin_error(admin, "admin access needed");
+
+ if (cf_pause_mode)
+ return admin_error(admin, "already suspended/paused");
+
+ log_info("PAUSE command issued");
+ cf_pause_mode = 1;
+ admin->wait_for_response = 1;
+
+ return true;
+}
+
+/* extract substring from regex group */
+static void copy_arg(const char *src, regmatch_t *glist,
+ int gnum, char *dst, int dstmax)
+{
+ regmatch_t *g = &glist[gnum];
+ unsigned len = g->rm_eo - g->rm_so;
+ if (len < dstmax)
+ memcpy(dst, src + g->rm_so, len);
+ else
+ len = 0;
+ dst[len] = 0;
+}
+
+/* extract quoted substring from regex group */
+static void copy_arg_unquote(const char *str, regmatch_t *glist,
+ int gnum, char *dst, int dstmax)
+{
+ regmatch_t *g = &glist[gnum];
+ int len = g->rm_eo - g->rm_so;
+ const char *src = str + g->rm_so;
+ const char *end = src + len;
+
+ if (len < dstmax) {
+ len = 0;
+ while (src < end) {
+ if (src[0] == '\'' && src[1] == '\'') {
+ *dst++ = '\'';
+ src += 2;
+ } else
+ *dst++ = *src++;
+ }
+ }
+ *dst = 0;
+}
+
+static bool admin_show_help(PgSocket *admin)
+{
+ bool res;
+ SEND_generic(res, admin, 'N',
+ "sssss",
+ "SNOTICE", "C00000", "MConsole usage",
+ "D\n\tSHOW [HELP|CONFIG|DATABASES|FDS"
+ "|POOLS|CLIENTS|SERVERS|LISTS|VERSION]\n"
+ "\tSET key = arg\n"
+ "\tRELOAD\n"
+ "\tPAUSE\n"
+ "\tSUSPEND\n"
+ "\tRESUME\n"
+ "\tSHUTDOWN", "");
+ if (res)
+ res = admin_ready(admin, "SHOW");
+ return res;
+}
+
+static bool admin_show_version(PgSocket *admin)
+{
+ bool res;
+ SEND_generic(res, admin, 'N',
+ "ssss", "SNOTICE", "C00000",
+ "MPgBouncer version " PACKAGE_VERSION, "");
+ if (res)
+ res = admin_ready(admin, "SHOW");
+ return res;
+}
+
+/* handle user query */
+static bool admin_parse_query(PgSocket *admin, const char *q)
+{
+ regmatch_t grp[MAX_GROUPS];
+ char key[64];
+ char val[256];
+ bool res = true;
+
+ if (regexec(&rc_show, q, MAX_GROUPS, grp, 0) == 0) {
+ copy_arg(q, grp, SHOW_ARG, key, sizeof(key));
+ if (strcasecmp(key, "help") == 0) {
+ res = admin_show_help(admin);
+ } else if (strcasecmp(key, "stats") == 0) {
+ res = admin_database_stats(admin, &pool_list);
+ } else if (strcasecmp(key, "config") == 0) {
+ res = admin_show_config(admin);
+ } else if (strcasecmp(key, "databases") == 0) {
+ res = admin_show_databases(admin);
+ } else if (strcasecmp(key, "users") == 0) {
+ res = admin_show_users(admin);
+ } else if (strcasecmp(key, "pools") == 0) {
+ res = admin_show_pools(admin);
+ } else if (strcasecmp(key, "clients") == 0) {
+ res = admin_show_clients(admin);
+ } else if (strcasecmp(key, "servers") == 0) {
+ res = admin_show_servers(admin);
+ } else if (strcasecmp(key, "lists") == 0) {
+ res = admin_show_lists(admin);
+ } else if (strcasecmp(key, "fds") == 0) {
+ res = admin_show_fds(admin);
+ } else if (strcasecmp(key, "version") == 0) {
+ res = admin_show_version(admin);
+ } else
+ res = admin_error(admin, "bad SHOW arg, use SHOW HELP");
+ } else if (regexec(&rc_set_str, q, MAX_GROUPS, grp, 0) == 0) {
+ copy_arg(q, grp, SET_KEY, key, sizeof(key));
+ copy_arg_unquote(q, grp, SET_VAL, val, sizeof(val));
+ if (!key[0] || !val[0]) {
+ res = admin_error(admin, "bad arguments");
+ } else
+ res = admin_set(admin, key, val);
+ } else if (regexec(&rc_set_word, q, MAX_GROUPS, grp, 0) == 0) {
+ copy_arg(q, grp, SET_KEY, key, sizeof(key));
+ copy_arg(q, grp, SET_VAL, val, sizeof(val));
+ if (!key[0] || !val[0]) {
+ res = admin_error(admin, "bad arguments");
+ } else
+ res = admin_set(admin, key, val);
+ } else if (regexec(&rc_single, q, MAX_GROUPS, grp, 0) == 0) {
+ copy_arg(q, grp, SINGLECMD, key, sizeof(key));
+ if (strcasecmp(key, "SHUTDOWN") == 0)
+ res = admin_cmd_shutdown(admin);
+ else if (strcasecmp(key, "SUSPEND") == 0)
+ res = admin_cmd_suspend(admin);
+ else if (strcasecmp(key, "PAUSE") == 0)
+ res = admin_cmd_pause(admin);
+ else if (strcasecmp(key, "RESUME") == 0)
+ res = admin_cmd_resume(admin);
+ else if (strcasecmp(key, "RELOAD") == 0)
+ res = admin_cmd_reload(admin);
+ else
+ res = admin_error(admin, "unknown command: %s", q);
+ } else
+ res = admin_error(admin, "unknown cmd: %s", q);
+
+ if (!res)
+ disconnect_client(admin, true, "failure");
+ return res;
+}
+
+/* handle packets */
+bool admin_handle_client(PgSocket *admin, MBuf *pkt, int pkt_type, int pkt_len)
+{
+ const char *q;
+ bool res;
+
+ /* dont tolerate partial packets */
+ if (mbuf_avail(pkt) < pkt_len - 5) {
+ disconnect_client(admin, true, "incomplete pkt");
+ return false;
+ }
+
+ switch (pkt_type) {
+ case 'Q':
+ q = mbuf_get_string(pkt);
+ if (!q) {
+ disconnect_client(admin, true, "incomplete query");
+ return false;
+ }
+ log_debug("got admin query: %s", q);
+ res = admin_parse_query(admin, q);
+ if (res)
+ sbuf_prepare_skip(&admin->sbuf, pkt_len);
+ return res;
+ case 'X':
+ disconnect_client(admin, false, "close req");
+ break;
+ default:
+ admin_error(admin, "unsupported pkt type: %d", pkt_type);
+ disconnect_client(admin, true, "bad pkt");
+ break;
+ }
+ return false;
+}
+
+/**
+ * Client is unauthenticated, look if it wants to connect
+ * to special "pgbouncer" user.
+ */
+bool admin_pre_login(PgSocket *client)
+{
+ uid_t peer_uid = 0;
+ bool res;
+ const char *username = client->auth_user->name;
+
+ client->admin_user = 0;
+ client->own_user = 0;
+
+ /* tag same uid as special */
+ if (client->addr.is_unix) {
+ res = get_unix_peer_uid(sbuf_socket(&client->sbuf), &peer_uid);
+ if (res && peer_uid == getuid()
+ && strcmp("pgbouncer", username) == 0)
+ {
+ client->own_user = 1;
+ client->admin_user = 1;
+ slog_info(client, "pgbouncer access from unix socket");
+ return true;
+ }
+ }
+
+ if (strlist_contains(cf_admin_users, username)) {
+ client->admin_user = 1;
+ return true;
+ } else if (strlist_contains(cf_stats_users, username)) {
+ return true;
+ }
+ disconnect_client(client, true, "not allowed");
+ return false;
+}
+
+/* init special database and query parsing */
+void admin_setup(void)
+{
+ PgDatabase *db;
+ PgPool *pool;
+ PgUser *user;
+ PktBuf msg;
+ int res;
+
+ /* fake database */
+ db = add_database("pgbouncer");
+ if (!db)
+ fatal("no mem for admin database");
+
+ db->addr.port = cf_listen_port;
+ db->addr.is_unix = 1;
+ db->pool_size = 2;
+ force_user(db, "pgbouncer", "");
+
+ /* fake pool, tag the it as special */
+ pool = get_pool(db, db->forced_user);
+ if (!pool)
+ fatal("cannot create admin pool?");
+ pool->admin = 1;
+ admin_pool = pool;
+
+ /* fake user, with disabled psw */
+ user = add_user("pgbouncer", "");
+ if (!user)
+ fatal("cannot create admin user?");
+ create_auth_cache();
+
+ /* prepare welcome */
+ pktbuf_static(&msg, db->welcome_msg, sizeof(db->welcome_msg));
+ pktbuf_write_AuthenticationOk(&msg);
+ pktbuf_write_ParameterStatus(&msg, "server_version", "8.0/bouncer");
+ pktbuf_write_ParameterStatus(&msg, "client_encoding", "UNICODE");
+ pktbuf_write_ParameterStatus(&msg, "server_encoding", "UNICODE");
+ pktbuf_write_ParameterStatus(&msg, "is_superuser", "on");
+
+ db->welcome_msg_len = pktbuf_written(&msg);
+ db->welcome_msg_ready = 1;
+
+ pktbuf_static(&msg, db->startup_params, sizeof(db->startup_params));
+ pktbuf_put_string(&msg, "database");
+ db->dbname = (char *)db->startup_params + pktbuf_written(&msg);
+ pktbuf_put_string(&msg, "pgbouncer");
+ db->startup_params_len = pktbuf_written(&msg);
+
+ /* initialize regexes */
+ res = regcomp(&rc_show, cmd_show_rx, REG_EXTENDED | REG_ICASE);
+ if (res != 0)
+ fatal("cmd show regex compilation error");
+ res = regcomp(&rc_set_word, cmd_set_word_rx, REG_EXTENDED | REG_ICASE);
+ if (res != 0)
+ fatal("set/word regex compilation error");
+ res = regcomp(&rc_set_str, cmd_set_str_rx, REG_EXTENDED | REG_ICASE);
+ if (res != 0)
+ fatal("set/str regex compilation error");
+ res = regcomp(&rc_single, cmd_single_rx, REG_EXTENDED | REG_ICASE);
+ if (res != 0)
+ fatal("singleword regex compilation error");
+}
+
+void admin_pause_done(void)
+{
+ List *item, *tmp;
+ PgSocket *admin;
+
+ statlist_for_each_safe(item, &admin_pool->active_client_list, tmp) {
+ admin = container_of(item, PgSocket, head);
+ if (!admin->wait_for_response)
+ continue;
+
+ switch (cf_pause_mode) {
+ case 1:
+ admin_ready(admin, "PAUSE");
+ break;
+ case 2:
+ admin_ready(admin, "SUSPEND");
+ break;
+ default:
+ fatal("admin_pause_done: bad state");
+ }
+ admin->wait_for_response = 0;
+ }
+
+ if (statlist_empty(&admin_pool->active_client_list)
+ && cf_pause_mode == 2)
+ {
+ log_info("Admin disappeared when suspended, doing RESUME");
+ cf_pause_mode = 0;
+ resume_all();
+ }
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+bool admin_handle_client(PgSocket *client, MBuf *pkt, int pkt_type, int pkt_len);
+bool admin_pre_login(PgSocket *client);
+void admin_setup(void);
+bool admin_error(PgSocket *console, const char *fmt, ...);
+void admin_pause_done(void);
+void admin_flush(PgSocket *admin, PktBuf *buf, const char *desc);
+bool admin_ready(PgSocket *admin, const char *desc);
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * core structures
+ */
+
+#include "system.h"
+
+#include <event.h>
+
+/* each state corresponts to a list */
+enum SocketState {
+ CL_FREE, /* free_client_list */
+ CL_LOGIN, /* login_client_list */
+ CL_WAITING, /* pool->waiting_client_list */
+ CL_ACTIVE, /* pool->active_client_list */
+ CL_CANCEL, /* pool->cancel_req_list */
+
+ SV_FREE, /* free_server_list */
+ SV_LOGIN, /* pool->new_server_list */
+ SV_IDLE, /* pool->idle_server_list */
+ SV_ACTIVE, /* pool->active_server_list */
+ SV_USED, /* pool->used_server_list */
+ SV_TESTED /* pool->tested_server_list */
+};
+
+#define is_server_socket(sk) ((sk)->state >= SV_FREE)
+
+
+typedef struct PgSocket PgSocket;
+typedef struct PgUser PgUser;
+typedef struct PgDatabase PgDatabase;
+typedef struct PgPool PgPool;
+typedef struct PgStats PgStats;
+typedef struct PgAddr PgAddr;
+typedef enum SocketState SocketState;
+
+#include "util.h"
+#include "list.h"
+#include "mbuf.h"
+#include "sbuf.h"
+#include "pktbuf.h"
+
+#include "admin.h"
+#include "loader.h"
+#include "client.h"
+#include "server.h"
+#include "pooler.h"
+#include "proto.h"
+#include "objects.h"
+#include "stats.h"
+#include "takeover.h"
+#include "janitor.h"
+
+/* to avoid allocations will use static buffers */
+#define MAX_DBNAME 64
+#define MAX_USERNAME 64
+#define MAX_PASSWORD 64
+
+/* auth modes, should match PG's */
+#define AUTH_ANY -1 /* same as trust but without username check */
+#define AUTH_TRUST 0
+#define AUTH_PLAIN 3
+#define AUTH_CRYPT 4
+#define AUTH_MD5 5
+#define AUTH_CREDS 6
+
+/* type codes for weird pkts */
+#define PKT_STARTUP 0x30000
+#define PKT_SSLREQ 80877103
+#define PKT_CANCEL 80877102
+
+#define POOL_SESSION 0
+#define POOL_TX 1
+#define POOL_STMT 2
+
+struct PgAddr {
+ struct in_addr ip_addr;
+ unsigned short port;
+ unsigned is_unix:1;
+};
+
+struct PgStats {
+ uint64 request_count;
+ uint64 server_bytes;
+ uint64 client_bytes;
+ usec_t query_time; /* total req time in us */
+};
+
+/* contains connections for one db/user combo */
+struct PgPool {
+ List head; /* all pools */
+ List map_head; /* pools for specific client/db */
+
+ /* pool contains connection into 'db' under 'user' */
+ PgDatabase * db;
+ PgUser * user;
+
+ /* waiting events logged in clients */
+ StatList active_client_list;
+ /* client waits for a server to be available */
+ StatList waiting_client_list;
+ /* closed client connections with server key */
+ StatList cancel_req_list;
+
+ /* servers linked with clients */
+ StatList active_server_list;
+ /* servers ready to be linked with clients */
+ StatList idle_server_list;
+ /* server just unlinked from clients */
+ StatList used_server_list;
+ /* server in testing process */
+ StatList tested_server_list;
+ /* servers in login phase */
+ StatList new_server_list;
+
+ /* stats */
+ PgStats stats;
+ PgStats newer_stats;
+ PgStats older_stats;
+
+ /* if last connect failed, there should be delay before next */
+ usec_t last_connect_time;
+ unsigned last_connect_failed:1;
+ unsigned admin:1;
+};
+
+#define pool_server_count(pool) ( \
+ statlist_count(&(pool)->active_server_list) + \
+ statlist_count(&(pool)->idle_server_list) + \
+ statlist_count(&(pool)->new_server_list) + \
+ statlist_count(&(pool)->tested_server_list) + \
+ statlist_count(&(pool)->used_server_list))
+
+#define pool_client_count(pool) ( \
+ statlist_count(&(pool)->active_client_list) + \
+ statlist_count(&(pool)->waiting_client_list))
+
+struct PgUser {
+ List head;
+ List pool_list;
+ char name[MAX_USERNAME];
+ char passwd[MAX_PASSWORD];
+};
+
+struct PgDatabase {
+ List head;
+ char name[MAX_DBNAME];
+
+ /* database info to be sent to client */
+ uint8 welcome_msg[512];
+ unsigned welcome_msg_len;
+ unsigned welcome_msg_ready:1;
+
+ /* key/val pairs (without user) for startup msg to be sent to server */
+ uint8 startup_params[256];
+ unsigned startup_params_len;
+
+ /* if not NULL, the user/psw is forced */
+ PgUser * forced_user;
+
+ /* address prepared for connect() */
+ PgAddr addr;
+
+ /* max server connections in one pool */
+ int pool_size;
+
+ /* info fields, pointer to inside startup_msg */
+ const char * dbname;
+};
+
+struct PgSocket {
+ List head; /* list header */
+ PgSocket * link; /* the dest of packets */
+ PgPool * pool; /* parent pool, if NULL not yet assigned */
+
+ SocketState state;
+
+ unsigned wait_for_welcome:1; /* no server yet in pool */
+ unsigned ready:1; /* server accepts new query */
+ unsigned flush_req:1; /* client requested flush */
+ unsigned admin_user:1;
+ unsigned own_user:1; /* is console client with same uid */
+
+ /* if the socket is suspended */
+ unsigned suspended:1;
+
+ /* admin conn, waits for completion of PAUSE/SUSPEND cmd */
+ unsigned wait_for_response:1;
+ /* this (server0 socket must be closed ASAP */
+ unsigned close_needed:1;
+
+ usec_t connect_time; /* when connection was made */
+ usec_t request_time; /* last activity time */
+ usec_t query_start; /* query start moment */
+
+ char salt[4];
+ uint8 cancel_key[8];
+ PgUser * auth_user;
+ PgAddr addr;
+
+ SBuf sbuf; /* stream buffer, must be last */
+};
+
+/* where to store old fd info during SHOW FDS result processing */
+#define tmp_sk_oldfd request_time
+#define tmp_sk_linkfd query_start
+/* takeover_clean_socket() needs to clean those up */
+
+/* main.c */
+extern int cf_verbose;
+extern int cf_daemon;
+
+extern char *cf_unix_socket_dir;
+extern char *cf_listen_addr;
+extern int cf_listen_port;
+
+extern int cf_pool_mode;
+extern int cf_max_client_conn;
+extern int cf_default_pool_size;
+
+extern usec_t cf_server_lifetime;
+extern usec_t cf_server_idle_timeout;
+extern char * cf_server_check_query;
+extern usec_t cf_server_check_delay;
+extern usec_t cf_server_connect_timeout;
+extern usec_t cf_server_login_retry;
+extern usec_t cf_query_timeout;
+extern usec_t cf_client_idle_timeout;
+
+extern int cf_auth_type;
+extern char *cf_auth_file;
+
+extern char *cf_logfile;
+extern char *cf_pidfile;
+
+extern char *cf_admin_users;
+extern char *cf_stats_users;
+extern int cf_stats_period;
+
+extern int cf_pause_mode;
+extern int cf_shutdown;
+extern int cf_reboot;
+
+extern int cf_sbuf_len;
+extern int cf_tcp_keepalive;
+extern int cf_tcp_keepcnt;
+extern int cf_tcp_keepidle;
+extern int cf_tcp_keepintvl;
+extern int cf_tcp_socket_buffer;
+extern int cf_tcp_defer_accept;
+
+extern ConfElem bouncer_params[];
+
+
+static inline PgSocket *
+pop_socket(StatList *slist)
+{
+ List *item = statlist_pop(slist);
+ if (item == NULL)
+ return NULL;
+ return container_of(item, PgSocket, head);
+}
+
+static inline PgSocket *
+first_socket(StatList *slist)
+{
+ if (statlist_empty(slist)) {
+ log_debug("first_socket: statlist_empty");
+ return NULL;
+ }
+ log_debug("first_socket: next=%p", slist->head.next);
+ return container_of(slist->head.next, PgSocket, head);
+}
+
+void load_config(bool reload);
+
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Client connection handling
+ */
+
+#include "bouncer.h"
+
+static bool check_client_passwd(PgSocket *client, const char *passwd)
+{
+ char md5[MD5_PASSWD_LEN + 1];
+ const char *correct;
+ PgUser *user = client->auth_user;
+
+ /* disallow empty passwords */
+ if (!*passwd || !*user->passwd)
+ return false;
+
+ switch (cf_auth_type) {
+ case AUTH_PLAIN:
+ return strcmp(user->passwd, passwd) == 0;
+ case AUTH_CRYPT:
+ correct = pg_crypt(user->passwd, (char *)client->salt);
+ return strcmp(correct, passwd) == 0;
+ case AUTH_MD5:
+ if (strlen(passwd) != MD5_PASSWD_LEN)
+ return false;
+ if (!isMD5(user->passwd))
+ pg_md5_encrypt(user->passwd, user->name, strlen(user->name), user->passwd);
+ pg_md5_encrypt(user->passwd + 3, client->salt, 4, md5);
+ return strcmp(md5, passwd) == 0;
+ }
+ return false;
+}
+
+bool
+set_pool(PgSocket *client, const char *dbname, const char *username)
+{
+ PgDatabase *db;
+ PgUser *user;
+
+ /* find database */
+ db = find_database(dbname);
+ if (!db) {
+ disconnect_client(client, true, "No such database");
+ return false;
+ }
+
+ /* find user */
+ if (cf_auth_type == AUTH_ANY) {
+ /* ignore requested user */
+ user = NULL;
+
+ if (db->forced_user == NULL) {
+ disconnect_client(client, true, "bouncer config error");
+ log_error("auth_type=any requires forced user");
+ return false;
+ }
+ client->auth_user = db->forced_user;
+ } else {
+ /* the user clients wants to log in as */
+ user = find_user(username);
+ if (!user) {
+ disconnect_client(client, true, "No such user");
+ return false;
+ }
+ client->auth_user = user;
+ }
+
+ /* pool user may be forced */
+ if (db->forced_user)
+ user = db->forced_user;
+ client->pool = get_pool(db, user);
+ if (!client->pool) {
+ disconnect_client(client, true, "no mem for pool");
+ return false;
+ }
+
+ return true;
+}
+
+static bool decide_startup_pool(PgSocket *client, MBuf *pkt)
+{
+ const char *username = NULL, *dbname = NULL;
+ const char *key, *val;
+
+ while (1) {
+ key = mbuf_get_string(pkt);
+ if (!key || *key == 0)
+ break;
+ val = mbuf_get_string(pkt);
+ if (!val)
+ break;
+
+ if (strcmp(key, "database") == 0)
+ dbname = val;
+ else if (strcmp(key, "user") == 0)
+ username = val;
+ }
+ if (!username) {
+ disconnect_client(client, true, "No username supplied");
+ return false;
+ }
+ if (!dbname) {
+ disconnect_client(client, true, "No database supplied");
+ return false;
+ }
+ slog_debug(client, "login request: db=%s user=%s", dbname, username);
+
+ /* check if limit allows, dont limit admin db
+ nb: new incoming conn will be attached to PgSocket, thus
+ get_active_client_count() counts it */
+ if (get_active_client_count() > cf_max_client_conn) {
+ if (strcmp(dbname, "pgbouncer") != 0) {
+ disconnect_client(client, true, "no more conns allowed");
+ return false;
+ }
+ }
+ return set_pool(client, dbname, username);
+}
+
+static const char valid_crypt_salt[] =
+"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+static bool send_client_authreq(PgSocket *client)
+{
+ uint8 saltlen = 0;
+ int res;
+ int auth = cf_auth_type;
+
+ if (auth == AUTH_CRYPT) {
+ saltlen = 2;
+ get_random_bytes((void*)client->salt, saltlen);
+ client->salt[0] = valid_crypt_salt[client->salt[0] & 0x3f];
+ client->salt[1] = valid_crypt_salt[client->salt[1] & 0x3f];
+ client->salt[2] = 0;
+ } else if (cf_auth_type == AUTH_MD5) {
+ saltlen = 4;
+ get_random_bytes((void*)client->salt, saltlen);
+ } else if (auth == AUTH_ANY)
+ auth = AUTH_TRUST;
+
+ SEND_generic(res, client, 'R', "ib", auth, client->salt, saltlen);
+ return res;
+}
+
+/* decide on packets of client in login phase */
+static bool handle_client_startup(PgSocket *client, MBuf *pkt)
+{
+ unsigned pkt_type;
+ unsigned pkt_len;
+ const char *passwd;
+
+ SBuf *sbuf = &client->sbuf;
+
+ /* dont tolerate partial packets */
+ if (!get_header(pkt, &pkt_type, &pkt_len)) {
+ disconnect_client(client, true, "client sent bad pkt header");
+ return false;
+ }
+
+ if (client->wait_for_welcome) {
+ if (finish_client_login(client)) {
+ /* the packet was already parsed */
+ sbuf_prepare_skip(sbuf, pkt_len);
+ return true;
+ } else
+ return false;
+ }
+
+ slog_noise(client, "pkt='%c' len=%d",
+ pkt_type < 256 ? pkt_type : '?', pkt_len);
+
+ switch (pkt_type) {
+ case PKT_SSLREQ:
+ log_noise("C: req SSL");
+ log_noise("P: nak");
+ sbuf_answer(&client->sbuf, "N", 1);
+ break;
+ case PKT_STARTUP:
+ if (mbuf_avail(pkt) < pkt_len - 8) {
+ disconnect_client(client, true, "client sent partial pkt in startup");
+ return false;
+ }
+ if (client->pool) {
+ disconnect_client(client, true, "client re-sent startup pkt");
+ return false;
+ }
+
+ if (!decide_startup_pool(client, pkt))
+ return false;
+
+ if (client->pool->admin) {
+ if (!admin_pre_login(client))
+ return false;
+ }
+
+ if (cf_auth_type <= AUTH_TRUST || client->own_user) {
+ if (!finish_client_login(client))
+ return false;
+ } else {
+ send_client_authreq(client);
+ }
+ break;
+ case 'p': /* PasswordMessage */
+ if (mbuf_avail(pkt) < pkt_len - 5) {
+ disconnect_client(client, true, "client sent partial pkt in startup");
+ return false;
+ }
+
+ /* havent requested it */
+ if (cf_auth_type <= AUTH_TRUST) {
+ disconnect_client(client, true, "unreqested passwd pkt");
+ return false;
+ }
+
+ passwd = mbuf_get_string(pkt);
+ if (passwd && check_client_passwd(client, passwd)) {
+ if (!finish_client_login(client))
+ return false;
+ } else {
+ disconnect_client(client, true, "Login failed");
+ return false;
+ }
+ break;
+ case PKT_CANCEL:
+ if (mbuf_avail(pkt) == 8) {
+ const uint8 *key = mbuf_get_bytes(pkt, 8);
+ memcpy(client->cancel_key, key, 8);
+ accept_cancel_request(client);
+ } else
+ disconnect_client(client, false, "bad cancel request");
+ return false;
+ default:
+ disconnect_client(client, false, "bad pkt");
+ return false;
+ }
+ sbuf_prepare_skip(sbuf, pkt_len);
+ client->request_time = get_cached_time();
+ return true;
+}
+
+/* decide on packets of logged-in client */
+static bool handle_client_work(PgSocket *client, MBuf *pkt)
+{
+ unsigned pkt_type;
+ unsigned pkt_len;
+ bool flush = 0;
+ SBuf *sbuf = &client->sbuf;
+
+ if (!get_header(pkt, &pkt_type, &pkt_len)) {
+ disconnect_client(client, true, "bad pkt header");
+ return false;
+ }
+ slog_noise(client, "pkt='%c' len=%d", pkt_type, pkt_len);
+
+ switch (pkt_type) {
+
+ /* request immidiate response from server */
+ case 'H': /* Flush */
+ client->flush_req = 1;
+ case 'S': /* Sync */
+ /* sync is followed by ReadyForQuery */
+
+ /* one-packet queries */
+ case 'Q': /* Query */
+ case 'F': /* FunctionCall */
+
+ /* copy end markers */
+ case 'c': /* CopyDone(F/B) */
+ case 'f': /* CopyFail(F/B) */
+
+ /* above packets should be sent ASAP */
+ flush = 1;
+
+ /*
+ * extended protocol allows server (and thus pooler)
+ * to buffer packets until sync or flush is sent by client
+ */
+ case 'P': /* Parse */
+ case 'E': /* Execute */
+ case 'C': /* Close */
+ case 'B': /* Bind */
+ case 'D': /* Describe */
+ case 'd': /* CopyData(F/B) */
+
+ /* update stats */
+ if (!client->query_start) {
+ client->pool->stats.request_count++;
+ client->query_start = get_time_usec();
+ }
+
+ if (client->pool->admin)
+ return admin_handle_client(client, pkt, pkt_type, pkt_len);
+
+ /* aquire server */
+ if (!find_server(client))
+ return false;
+
+ client->pool->stats.client_bytes += pkt_len;
+
+ /* tag the server as dirty */
+ client->link->ready = 0;
+
+ /* forward the packet */
+ sbuf_prepare_send(sbuf, &client->link->sbuf, pkt_len, flush);
+ break;
+
+ /* client wants to go away */
+ default:
+ slog_error(client, "unknown pkt from client: %d/0x%x", pkt_type, pkt_type);
+ disconnect_client(client, true, "unknown pkt");
+ return false;
+ case 'X': /* Terminate */
+ disconnect_client(client, false, "client close request");
+ return false;
+ }
+ return true;
+}
+
+/* callback from SBuf */
+bool client_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg)
+{
+ bool res = false;
+ PgSocket *client = arg;
+
+ Assert(!is_server_socket(client));
+ Assert(client->state != SV_FREE);
+
+ switch (evtype) {
+ case SBUF_EV_CONNECT_OK:
+ case SBUF_EV_CONNECT_FAILED:
+ /* ^ those should not happen */
+ case SBUF_EV_RECV_FAILED:
+ disconnect_client(client, false, "client unexpected eof");
+ break;
+ case SBUF_EV_SEND_FAILED:
+ disconnect_server(client->link, false, "Server connection closed");
+ break;
+ case SBUF_EV_READ:
+ if (mbuf_avail(pkt) < 5) {
+ log_noise("C: got partial header, trying to wait a bit");
+ return false;
+ }
+
+ client->request_time = get_cached_time();
+ switch (client->state) {
+ case CL_LOGIN:
+ res = handle_client_startup(client, pkt);
+ break;
+ case CL_ACTIVE:
+ if (client->wait_for_welcome)
+ res = handle_client_startup(client, pkt);
+ else
+ res = handle_client_work(client, pkt);
+ break;
+ case CL_WAITING:
+ fatal("why waiting client in client_proto()");
+ default:
+ fatal("bad client state: %d", client->state);
+ }
+ }
+ return res;
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+bool client_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg);
+bool set_pool(PgSocket *client, const char *dbname, const char *username);
+
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Periodic maintenance.
+ */
+
+#include "bouncer.h"
+
+static struct timeval full_maint_period = {0, USEC / 3};
+static struct event full_maint_ev;
+
+/* close all sockets in server list */
+static void close_server_list(StatList *sk_list, const char *reason)
+{
+ List *item, *tmp;
+ PgSocket *server;
+
+ statlist_for_each_safe(item, sk_list, tmp) {
+ server = container_of(item, PgSocket, head);
+ disconnect_server(server, true, reason);
+ }
+}
+
+/* suspend all sockets in socket list */
+static int suspend_socket_list(StatList *list)
+{
+ List *item;
+ PgSocket *sk;
+ int active = 0;
+
+ statlist_for_each(item, list) {
+ sk = container_of(item, PgSocket, head);
+ if (!sk->suspended) {
+ if (sbuf_empty(&sk->sbuf)) {
+ sbuf_pause(&sk->sbuf);
+ sk->suspended = 1;
+ } else
+ active++;
+ }
+ }
+ return active;
+}
+
+/* resume all suspended sockets in socket list */
+static void resume_socket_list(StatList *list)
+{
+ List *item, *tmp;
+ PgSocket *sk;
+
+ statlist_for_each_safe(item, list, tmp) {
+ sk = container_of(item, PgSocket, head);
+ if (sk->suspended) {
+ sk->suspended = 0;
+ sbuf_continue(&sk->sbuf);
+ }
+ }
+}
+
+/* resume all suspended sockets in all pools */
+static void resume_sockets(void)
+{
+ List *item;
+ PgPool *pool;
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ if (pool->admin)
+ continue;
+ resume_socket_list(&pool->active_client_list);
+ resume_socket_list(&pool->active_server_list);
+ resume_socket_list(&pool->idle_server_list);
+ resume_socket_list(&pool->used_server_list);
+ }
+}
+
+/* resume pools and listen sockets */
+void resume_all(void)
+{
+ resume_sockets();
+ resume_pooler();
+}
+
+/*
+ * send test/reset query to server if needed
+ */
+static void launch_recheck(PgPool *pool)
+{
+ const char *q = cf_server_check_query;
+ bool need_check = true;
+ PgSocket *server;
+ bool res = true;
+
+ server = first_socket(&pool->used_server_list);
+
+ /* is the check needed? */
+ if (q == NULL || q[0] == 0)
+ need_check = false;
+ else if (cf_server_check_delay > 0) {
+ usec_t now = get_cached_time();
+ if (now - server->request_time < cf_server_check_delay)
+ need_check = false;
+ }
+
+ if (need_check) {
+ /* send test query, wait for result */
+ change_server_state(server, SV_TESTED);
+ SEND_generic(res, server, 'Q', "s", q);
+ if (!res)
+ disconnect_server(server, false, "test query failed");
+ } else
+ /* make immidiately available */
+ change_server_state(server, SV_IDLE);
+}
+
+/*
+ * make servers available
+ */
+static void per_loop_activate(PgPool *pool)
+{
+ List *item, *tmp;
+ PgSocket *client;
+
+ /* see if any server have been freed */
+ statlist_for_each_safe(item, &pool->waiting_client_list, tmp) {
+ client = container_of(item, PgSocket, head);
+ if (!statlist_empty(&pool->idle_server_list)) {
+
+ /* db not fully initialized after reboot */
+ if (client->wait_for_welcome && !pool->db->welcome_msg_ready) {
+ launch_new_connection(pool);
+ continue;
+ }
+
+ /* there is a ready server already */
+ activate_client(client);
+ } else if (!statlist_empty(&pool->tested_server_list)) {
+ /* some connections are in testing process */
+
+ /* not enough connections? (X) */
+ launch_new_connection(pool);
+ break;
+ } else if (!statlist_empty(&pool->used_server_list)) {
+ /* ask for more connections to be tested */
+ launch_recheck(pool);
+
+ /* not enough connections? (X) */
+ launch_new_connection(pool);
+ break;
+ } else {
+ /* not enough connections */
+ launch_new_connection(pool);
+ break;
+ }
+ }
+}
+/*
+ * (X) - theres some problem in light load with small server_check_timeout
+ * where waiting connection wont ever get server connection.
+ */
+
+/*
+ * pause active clients
+ */
+static int per_loop_pause(PgPool *pool)
+{
+ int active = 0;
+
+ if (pool->admin)
+ return 0;
+
+ close_server_list(&pool->idle_server_list, "pause mode");
+ close_server_list(&pool->used_server_list, "pause mode");
+ close_server_list(&pool->new_server_list, "pause mode");
+
+ active += statlist_count(&pool->active_server_list);
+ active += statlist_count(&pool->tested_server_list);
+
+ return active;
+}
+
+/*
+ * suspend active clients and servers
+ */
+static int per_loop_suspend(PgPool *pool)
+{
+ int active = 0;
+
+ if (pool->admin)
+ return 0;
+
+ active += suspend_socket_list(&pool->active_client_list);
+
+ if (!statlist_empty(&pool->waiting_client_list)) {
+ active += statlist_count(&pool->waiting_client_list);
+ per_loop_activate(pool);
+ }
+
+ if (!active) {
+ active += suspend_socket_list(&pool->active_server_list);
+ active += suspend_socket_list(&pool->idle_server_list);
+ active += statlist_count(&pool->tested_server_list);
+
+ /* as all clients are done, no need for them */
+ close_server_list(&pool->used_server_list, "close unsafe fds on suspend");
+ }
+
+ return active;
+}
+
+/*
+ * this function is called for each event loop.
+ */
+void per_loop_object_maint(void)
+{
+ List *item;
+ PgPool *pool;
+ int active = 0;
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ if (pool->admin)
+ continue;
+ switch (cf_pause_mode) {
+ case 0:
+ per_loop_activate(pool);
+ break;
+ case 1:
+ active += per_loop_pause(pool);
+ break;
+ case 2:
+ active += per_loop_suspend(pool);
+ break;
+ }
+ }
+
+ switch (cf_pause_mode) {
+ case 2:
+ active += statlist_count(&login_client_list);
+ case 1:
+ if (!active)
+ admin_pause_done();
+ default:
+ break;
+ }
+}
+
+/* maintaing clients in pool */
+static void pool_client_maint(PgPool *pool)
+{
+ List *item, *tmp;
+ usec_t now = get_cached_time();
+ PgSocket *client;
+ usec_t age;
+
+ /* force client_idle_timeout */
+ if (cf_client_idle_timeout > 0) {
+ statlist_for_each_safe(item, &pool->active_client_list, tmp) {
+ client = container_of(item, PgSocket, head);
+ Assert(client->state == CL_ACTIVE);
+ if (client->link)
+ continue;
+ if (now - client->request_time > cf_client_idle_timeout)
+ disconnect_client(client, true, "idle_timeout");
+ }
+ }
+
+ /* force client_query_timeout */
+ if (cf_query_timeout > 0) {
+ statlist_for_each_safe(item, &pool->waiting_client_list, tmp) {
+ client = container_of(item, PgSocket, head);
+ Assert(client->state == CL_WAITING);
+ if (client->query_start == 0) {
+ age = now - client->request_time;
+ log_warning("query_start==0");
+ } else
+ age = now - client->query_start;
+ if (age > cf_query_timeout)
+ disconnect_client(client, true, "query_timeout");
+ }
+ }
+}
+
+static void check_unused_servers(StatList *slist, usec_t now, bool idle_test)
+{
+ List *item, *tmp;
+ usec_t idle, age;
+ PgSocket *server;
+
+ /* disconnect idle servers if needed */
+ statlist_for_each_safe(item, slist, tmp) {
+ server = container_of(item, PgSocket, head);
+
+ age = now - server->connect_time;
+ idle = now - server->request_time;
+
+ if (server->close_needed)
+ disconnect_server(server, true, "db conf changed");
+ else if (cf_server_idle_timeout > 0 && idle > cf_server_idle_timeout)
+ disconnect_server(server, true, "server idle timeout");
+ else if (cf_server_lifetime > 0 && age > cf_server_lifetime)
+ disconnect_server(server, true, "server lifetime over");
+ else if (cf_pause_mode == 1)
+ disconnect_server(server, true, "pause mode");
+ else if (idle_test && *cf_server_check_query) {
+ if (idle > cf_server_check_delay)
+ change_server_state(server, SV_USED);
+ }
+ }
+}
+
+/*
+ * Check pool size, close conns if too many. Makes pooler
+ * react faster to the case when admin decreased pool size.
+ */
+static void check_pool_size(PgPool *pool)
+{
+ PgSocket *server;
+ int cur = statlist_count(&pool->active_server_list)
+ + statlist_count(&pool->idle_server_list)
+ + statlist_count(&pool->used_server_list)
+ + statlist_count(&pool->tested_server_list);
+
+ /* cancel pkt may create new srv conn without
+ * taking pool_size into account
+ *
+ * statlist_count(&pool->new_server_list)
+ */
+
+ int many = cur - pool->db->pool_size;
+
+ Assert(pool->db->pool_size >= 0);
+
+ while (many > 0) {
+ server = first_socket(&pool->used_server_list);
+ if (!server)
+ server = first_socket(&pool->idle_server_list);
+ if (!server)
+ break;
+ disconnect_server(server, true, "too many servers in pool");
+ many--;
+ }
+}
+
+/* maintain servers in a pool */
+static void pool_server_maint(PgPool *pool)
+{
+ List *item, *tmp;
+ usec_t age, now = get_cached_time();
+ PgSocket *server;
+
+ /* find and disconnect idle servers */
+ check_unused_servers(&pool->used_server_list, now, 0);
+ check_unused_servers(&pool->tested_server_list, now, 0);
+ check_unused_servers(&pool->idle_server_list, now, 1);
+
+ /* where query got did not get answer in query_timeout */
+ if (cf_query_timeout > 0) {
+ statlist_for_each_safe(item, &pool->active_server_list, tmp) {
+ server = container_of(item, PgSocket, head);
+ Assert(server->state == SV_ACTIVE);
+ if (server->ready)
+ continue;
+ age = now - server->link->request_time;
+ if (age > cf_query_timeout)
+ disconnect_server(server, true, "statement timeout");
+ }
+ }
+
+ /* find connections that got connect, but could not log in */
+ if (cf_server_connect_timeout > 0) {
+ statlist_for_each_safe(item, &pool->new_server_list, tmp) {
+ server = container_of(item, PgSocket, head);
+ Assert(server->state == SV_LOGIN);
+
+ age = now - server->connect_time;
+ if (age > cf_server_connect_timeout)
+ disconnect_server(server, true, "connect timeout");
+ }
+ }
+
+ check_pool_size(pool);
+}
+
+/* full-scale maintenenace, done only occasionally */
+static void do_full_maint(int sock, short flags, void *arg)
+{
+ List *item;
+ PgPool *pool;
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ if (pool->admin)
+ continue;
+ pool_server_maint(pool);
+ pool_client_maint(pool);
+ }
+
+ if (cf_shutdown && get_active_server_count() == 0) {
+ log_info("server connections dropped, exiting");
+ exit(0);
+ }
+
+ loader_users_check();
+
+ evtimer_add(&full_maint_ev, &full_maint_period);
+}
+
+/* first-time initializtion */
+void janitor_setup(void)
+{
+ /* launch maintenance */
+ evtimer_set(&full_maint_ev, do_full_maint, NULL);
+ evtimer_add(&full_maint_ev, &full_maint_period);
+}
+
+/* as [pgbouncer] section can be loaded after databases,
+ theres need for review */
+void config_postprocess(void)
+{
+ List *item;
+ PgDatabase *db;
+
+ statlist_for_each(item, &database_list) {
+ db = container_of(item, PgDatabase, head);
+ if (db->pool_size < 0)
+ db->pool_size = cf_default_pool_size;
+ }
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void janitor_setup(void);
+void config_postprocess(void);
+void resume_all(void);
+void per_loop_object_maint(void);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Circular doubly linked list implementation.
+ *
+ * Basic idea from <linux/list.h>.
+ *
+ * <sys/queue.h> seemed usable, but overcomplicated.
+ */
+
+#ifndef __LIST_H_
+#define __LIST_H_
+
+/* turn on slow checking */
+#if defined(CASSERT) && !defined(LIST_DEBUG)
+#define LIST_DEBUG
+#endif
+
+/* give offset of a field inside struct */
+#ifndef offsetof
+#define offsetof(type, field) ((unsigned)&(((type *)0)->field))
+#endif
+
+/* given pointer to field inside struct, return pointer to struct */
+#ifndef container_of
+#define container_of(ptr, type, field) ((type *)((char *)(ptr) - offsetof(type, field)))
+#endif
+
+/* list type */
+typedef struct List List;
+struct List {
+ List *next;
+ List *prev;
+};
+
+#define LIST(var) List var = { &var, &var }
+
+/* initialize struct */
+static inline void list_init(List *list)
+{
+ list->next = list->prev = list;
+}
+
+/* is list empty? */
+static inline bool list_empty(List *list)
+{
+ return list->next == list;
+}
+
+/* add item to the start of the list */
+static inline List *list_prepend(List *item, List *list)
+{
+ Assert(list_empty(item));
+
+ item->next = list->next;
+ item->prev = list;
+ list->next->prev = item;
+ list->next = item;
+ return item;
+}
+
+/* add item to the end of the list */
+static inline List *list_append(List *item, List *list)
+{
+ Assert(list_empty(item));
+
+ item->next = list;
+ item->prev = list->prev;
+ list->prev->next = item;
+ list->prev = item;
+ return item;
+}
+
+/* remove item from list */
+static inline List *list_del(List *item)
+{
+ item->prev->next = item->next;
+ item->next->prev = item->prev;
+ item->next = item->prev = item;
+ return item;
+}
+
+/* remove first from list and return */
+static inline List *list_pop(List *list)
+{
+ if (list_empty(list))
+ return NULL;
+ return list_del(list->next);
+}
+
+/* remove first from list and return */
+static inline List *list_first(List *list)
+{
+ if (list_empty(list))
+ return NULL;
+ return list->next;
+}
+
+/* remove first elem from list and return with casting */
+#define list_pop_type(list, typ, field) \
+ (list_empty(list) ? NULL \
+ : container_of(list_del((list)->next), typ, field))
+
+/* loop over list */
+#define list_for_each(item, list) \
+ for ((item) = (list)->next; \
+ (item) != (list); \
+ (item) = (item)->next)
+
+/* loop over list and allow removing item */
+#define list_for_each_safe(item, list, tmp) \
+ for ((item) = (list)->next, (tmp) = (list)->next->next; \
+ (item) != (list); \
+ (item) = (tmp), (tmp) = (tmp)->next)
+
+static inline bool item_in_list(List *item, List *list)
+{
+ List *tmp;
+ list_for_each(tmp, list)
+ if (tmp == item)
+ return 1;
+ return 0;
+}
+
+
+/*
+ * wrapper for List that keeps track of number of items
+ */
+
+typedef struct StatList StatList;
+struct StatList {
+ List head;
+ int cur_count;
+ int max_count;
+ const char *name;
+};
+
+#define STATLIST(var) StatList var = { {&var.head, &var.head}, 0, 0, #var }
+
+static inline void statlist_reset(StatList *list)
+{
+ list->max_count = list->cur_count;
+}
+
+static inline void statlist_prepend(List *item, StatList *list)
+{
+ list_prepend(item, &list->head);
+ list->cur_count ++;
+ if (list->cur_count > list->max_count)
+ list->max_count = list->cur_count;
+}
+
+static inline void statlist_append(List *item, StatList *list)
+{
+ list_append(item, &list->head);
+ list->cur_count ++;
+ if (list->cur_count > list->max_count)
+ list->max_count = list->cur_count;
+}
+
+static inline void statlist_put_before(List *item, StatList *list, List *pos)
+{
+ list_append(item, pos);
+ list->cur_count++;
+ if (list->cur_count > list->max_count)
+ list->max_count = list->cur_count;
+}
+
+static inline void statlist_remove(List *item, StatList *list)
+{
+#ifdef LIST_DEBUG
+ /* sanity check */
+ if (!item_in_list(item, &list->head))
+ fatal("item in wrong list, expected: %s", list->name);
+#endif
+
+ list_del(item);
+ list->cur_count--;
+
+ Assert(list->cur_count >= 0);
+}
+
+static inline void statlist_init(StatList *list, const char *name)
+{
+ list_init(&list->head);
+ list->name = name;
+ list->cur_count = list->max_count = 0;
+}
+
+static inline int statlist_count(StatList *list)
+{
+ Assert(list->cur_count > 0 || list_empty(&list->head));
+ return list->cur_count;
+}
+
+static inline int statlist_max(StatList *list)
+{
+ return list->max_count > list->cur_count
+ ? list->max_count : list->cur_count;
+}
+
+static inline List *statlist_pop(StatList *list)
+{
+ List *item = list_pop(&list->head);
+
+ if (item)
+ list->cur_count--;
+
+ Assert(list->cur_count >= 0);
+
+ return item;
+}
+
+static inline List *statlist_first(StatList *list)
+{
+ return list_first(&list->head);
+}
+
+static inline bool statlist_empty(StatList *list)
+{
+ return list_empty(&list->head);
+}
+
+#define statlist_for_each(item, list) list_for_each(item, &((list)->head))
+#define statlist_for_each_safe(item, list, tmp) list_for_each_safe(item, &((list)->head), tmp)
+
+#endif /* __LIST_H_ */
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Config and pg_auth file reading.
+ */
+
+#include "bouncer.h"
+
+/*
+ * ConnString parsing
+ */
+
+/* get key=val pair from connstring */
+static char * getpair(char *p,
+ char **key_p, int *key_len,
+ char **val_p, int *val_len)
+{
+ while (*p && *p == ' ')
+ p++;
+ *key_p = p;
+ while (*p && *p != '=' && *p != ' ')
+ p++;
+ *key_len = p - *key_p;
+ if (*p == '=')
+ p++;
+ *val_p = p;
+ while (*p && *p != ' ')
+ p++;
+ *val_len = p - *val_p;
+
+ while (*p && *p == ' ')
+ p++;
+ return p;
+}
+
+/* fill PgDatabase from connstr */
+void parse_database(char *name, char *connstr)
+{
+ char *p, *key, *val;
+ int klen, vlen;
+ PktBuf buf;
+ PgDatabase *db;
+ int pool_size = -1;
+
+ char *dbname = name;
+ char *host = NULL;
+ char *port = "5432";
+ char *username = NULL;
+ char *password = "";
+ char *client_encoding = NULL;
+ char *datestyle = NULL;
+
+ in_addr_t v_addr = INADDR_NONE;
+ int v_port;
+
+ p = connstr;
+ while (*p) {
+ p = getpair(p, &key, &klen, &val, &vlen);
+ if (*key == 0 || *val == 0 || klen == 0 || vlen == 0)
+ break;
+ key[klen] = 0;
+ val[vlen] = 0;
+
+ if (strcmp("dbname", key) == 0)
+ dbname = val;
+ else if (strcmp("host", key) == 0)
+ host = val;
+ else if (strcmp("port", key) == 0)
+ port = val;
+ else if (strcmp("user", key) == 0)
+ username = val;
+ else if (strcmp("password", key) == 0)
+ password = val;
+ else if (strcmp("client_encoding", key) == 0)
+ client_encoding = val;
+ else if (strcmp("datestyle", key) == 0)
+ datestyle = val;
+ else if (strcmp("pool_size", key) == 0)
+ pool_size = atoi(val);
+ else {
+ log_error("skipping database %s because"
+ " of bad connstring: %s", name, connstr);
+ return;
+ }
+ }
+
+ if (!host) {
+ if (!cf_unix_socket_dir) {
+ log_error("skipping database %s because"
+ " unix socket not configured", name);
+ return;
+ }
+ } else {
+ v_addr = inet_addr(host);
+ if (v_addr == INADDR_NONE) {
+ log_error("skipping database %s because"
+ " of bad host: %s", name, host);
+ return;
+ }
+ }
+ v_port = atoi(port);
+ if (v_port == 0) {
+ log_error("skipping database %s because"
+ " of bad port: %s", name, port);
+ return;
+ }
+
+ db = add_database(name);
+ if (!db) {
+ log_error("cannot create database, no mem?");
+ return;
+ }
+
+ if (db->dbname) {
+ bool changed = false;
+ if (strcmp(db->dbname, dbname) != 0)
+ changed = true;
+ else if (host && db->addr.is_unix)
+ changed = true;
+ else if (!host && !db->addr.is_unix)
+ changed = true;
+ else if (host && v_addr != db->addr.ip_addr.s_addr)
+ changed = true;
+ else if (v_port != db->addr.port)
+ changed = true;
+ else if (username && !db->forced_user)
+ changed = true;
+ else if (username && strcmp(username, db->forced_user->name))
+ changed = true;
+ else if (!username && db->forced_user)
+ changed = true;
+
+ if (changed)
+ tag_database_dirty(db);
+ }
+
+ /* if pool_size < 0 it will be set later */
+ db->pool_size = pool_size;
+ db->addr.port = v_port;
+ db->addr.ip_addr.s_addr = v_addr;
+ db->addr.is_unix = host ? 0 : 1;
+
+ pktbuf_static(&buf, db->startup_params, sizeof(db->startup_params));
+
+ pktbuf_put_string(&buf, "database");
+ db->dbname = (char *)db->startup_params + pktbuf_written(&buf);
+ pktbuf_put_string(&buf, dbname);
+
+ if (client_encoding) {
+ pktbuf_put_string(&buf, "client_encoding");
+ pktbuf_put_string(&buf, client_encoding);
+ }
+
+ if (datestyle) {
+ pktbuf_put_string(&buf, "datestyle");
+ pktbuf_put_string(&buf, datestyle);
+ }
+
+ db->startup_params_len = pktbuf_written(&buf);
+
+ /* if user is forces, create fake object for it */
+ if (username != NULL) {
+ if (!force_user(db, username, password))
+ log_warning("db setup failed, trying to continue");
+ } else if (db->forced_user)
+ log_warning("losing forced user not supported,"
+ " keeping old setting");
+}
+
+/*
+ * User file parsing
+ */
+
+/* find next " in string, skipping escaped ones */
+static char *find_quote(char *p)
+{
+loop:
+ while (*p && *p != '\\' && *p != '"') p++;
+ if (*p == '\\' && p[1]) {
+ p += 2;
+ goto loop;
+ }
+
+ return p;
+}
+
+/* string is unquoted while copying */
+static void copy_quoted(char *dst, const char *src, int len)
+{
+ char *end = dst + len - 1;
+ while (*src && dst < end) {
+ if (*src != '\\')
+ *dst++ = *src++;
+ else
+ src++;
+ }
+ *dst = 0;
+}
+
+static void unquote_add_user(const char *username, const char *password)
+{
+ char real_user[MAX_USERNAME];
+ char real_passwd[MAX_PASSWORD];
+ PgUser *user;
+
+ copy_quoted(real_user, username, sizeof(real_user));
+ copy_quoted(real_passwd, password, sizeof(real_passwd));
+
+ user = add_user(real_user, real_passwd);
+ if (!user)
+ log_warning("cannot create user, no mem");
+}
+
+static bool auth_loaded(const char *fn)
+{
+ static struct stat cache;
+ struct stat cur;
+
+ /* hack for resetting */
+ if (fn == NULL) {
+ memset(&cache, 0, sizeof(cache));
+ return false;
+ }
+
+ if (stat(fn, &cur) < 0)
+ return false;
+
+ if (cache.st_dev == cur.st_dev
+ && cache.st_ino == cur.st_ino
+ && cache.st_mode == cur.st_mode
+ && cache.st_uid == cur.st_gid
+ && cache.st_mtime == cur.st_mtime
+ && cache.st_size == cur.st_size)
+ return true;
+ cache = cur;
+ return false;
+}
+
+bool loader_users_check(void)
+{
+ if (auth_loaded(cf_auth_file))
+ return true;
+
+ return load_auth_file(cf_auth_file);
+}
+
+/* load list of users from pg_auth/pg_psw file */
+bool load_auth_file(const char *fn)
+{
+ char *user, *password, *buf, *p;
+
+ buf = load_file(fn);
+ if (buf == NULL) {
+ /* reset file info */
+ auth_loaded(NULL);
+ return false;
+ }
+
+ p = buf;
+ while (*p) {
+ /* skip whitespace and empty lines */
+ while (*p && isspace(*p)) p++;
+ if (!*p)
+ break;
+
+ /* start of line */
+ if (*p != '"') {
+ log_error("broken auth file");
+ break;
+ }
+ user = ++p;
+ p = find_quote(p);
+ if (*p != '"') {
+ log_error("broken auth file");
+ break;
+ }
+ if (p - user >= MAX_USERNAME) {
+ log_error("too long username");
+ break;
+ }
+ *p++ = 0; /* tag username end */
+
+ /* get password */
+ p = find_quote(p);
+ if (*p != '"') {
+ log_error("broken auth file");
+ break;
+ }
+ password = ++p;
+ p = find_quote(p);
+ if (*p != '"') {
+ log_error("broken auth file");
+ break;
+ }
+ if (p - password >= MAX_PASSWORD) {
+ log_error("too long password");
+ break;
+ }
+ *p++ = 0; /* tag password end */
+
+ /* send them away */
+ unquote_add_user(user, password);
+
+ /* skip rest of the line */
+ while (*p && *p != '\n') p++;
+ }
+ free(buf);
+
+ create_auth_cache();
+
+ return true;
+}
+
+/*
+ * INI file parser
+ */
+
+bool cf_set_int(ConfElem *elem, const char *val, PgSocket *console)
+{
+ int *int_p = elem->dst;
+ if (*val < '0' || *val > '9') {
+ admin_error(console, "bad value: %s", val);
+ return false;
+ }
+ *int_p = atoi(val);
+ return true;
+}
+
+const char *cf_get_int(ConfElem *elem)
+{
+ static char numbuf[32];
+ int val;
+
+ val = *(int *)elem->dst;
+ sprintf(numbuf, "%d", val);
+ return numbuf;
+}
+bool cf_set_time(ConfElem *elem, const char *val, PgSocket *console)
+{
+ usec_t *time_p = elem->dst;
+ if (*val < '0' || *val > '9') {
+ admin_error(console, "bad value: %s", val);
+ return false;
+ }
+ *time_p = USEC * (usec_t)atoi(val);
+ return true;
+}
+
+const char *cf_get_time(ConfElem *elem)
+{
+ static char numbuf[32];
+ usec_t val;
+
+ val = *(usec_t *)elem->dst;
+ sprintf(numbuf, "%d", (int)(val / USEC));
+ return numbuf;
+}
+
+bool cf_set_str(ConfElem *elem, const char *val, PgSocket *console)
+{
+ char **str_p = elem->dst;
+ char *tmp;
+
+ /* dont touch if not changed */
+ if (*str_p && strcmp(*str_p, val) == 0)
+ return true;
+
+ /* if dynamically allocated, free it */
+ if (elem->allocated)
+ free(*str_p);
+
+ tmp = strdup(val);
+ if (!tmp)
+ return false;
+
+ *str_p = tmp;
+ elem->allocated = true;
+ return true;
+}
+
+const char * cf_get_str(ConfElem *elem)
+{
+ return *(char **)elem->dst;
+}
+
+bool set_config_param(ConfElem *elem_list,
+ const char *key, const char *val,
+ bool reload, PgSocket *console)
+{
+ ConfElem *desc;
+
+ for (desc = elem_list; desc->name; desc++) {
+ if (strcasecmp(key, desc->name))
+ continue;
+
+ /* if reload not allowed, skip it */
+ if (reload && !desc->reloadable) {
+ if (console)
+ admin_error(console,
+ "%s cannot be changed online", key);
+ return false;
+ }
+
+ /* got config, parse it */
+ return desc->io.fn_set(desc, val, console);
+ }
+ admin_error(console, "unknown config parameter: %s", key);
+ return false;
+}
+
+static void map_config(ConfSection *sect, char *key, char *val, bool reload)
+{
+ if (sect == NULL)
+ return;
+
+ if (sect->data_fn)
+ sect->data_fn(key, val);
+ else
+ set_config_param(sect->elem_list, key, val, reload, NULL);
+}
+
+const char *conf_to_text(ConfElem *elem)
+{
+ return elem->io.fn_get(elem);
+}
+
+static ConfSection *find_section(ConfSection *sect, const char *name)
+{
+ for (; sect->name; sect++)
+ if (strcasecmp(sect->name, name) == 0)
+ return sect;
+ log_warning("unknown section in config: %s", name);
+ return NULL;
+}
+
+void iniparser(const char *fn, ConfSection *sect_list, bool reload)
+{
+ char *buf;
+ char *p, *key, *val;
+ int klen, vlen;
+ ConfSection *cur_section = NULL;
+
+ buf = load_file(fn);
+ if (buf == NULL) {
+ if (!reload)
+ exit(1);
+ else
+ return;
+ }
+
+ p = buf;
+ while (*p) {
+ /* space at the start of line - including empty lines */
+ while (*p && isspace(*p)) p++;
+
+ /* skip comment lines */
+ if (*p == '#' || *p == ';') {
+ while (*p && *p != '\n') p++;
+ continue;
+ }
+ /* got new section */
+ if (*p == '[') {
+ key = ++p;
+ while (*p && *p != ']' && *p != '\n') p++;
+ if (*p != ']') {
+ log_warning("bad section header");
+ cur_section = NULL;
+ continue;
+ }
+ *p++ = 0;
+
+ cur_section = find_section(sect_list, key);
+ continue;
+ }
+
+ /* done? */
+ if (*p == 0) break;
+
+ /* read key val */
+ key = p;
+ while (*p && (isalnum(*p) || *p == '_')) p++;
+ klen = p - key;
+
+ /* expect '=', skip it */
+ while (*p && (*p == ' ' || *p == '\t')) p++;
+ if (*p != '=') {
+ log_error("syntax error in config, stopping loading");
+ break;
+ } else
+ p++;
+ while (*p && (*p == ' ' || *p == '\t')) p++;
+
+ /* now read value */
+ val = p;
+ while (*p && (*p != '\n'))
+ p++;
+ vlen = p - val;
+ /* eat space at end */
+ while (vlen > 0 && isspace(val[vlen - 1]))
+ vlen--;
+
+ /* skip junk */
+ while (*p && isspace(*p)) p++;
+
+ /* our buf is r/w, so take it easy */
+ key[klen] = 0;
+ val[vlen] = 0;
+ map_config(cur_section, key, val, reload);
+ }
+
+ free(buf);
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* configuration parsing */
+#define CF_INT {cf_get_int, cf_set_int}
+#define CF_STR {cf_get_str, cf_set_str}
+#define CF_TIME {cf_get_time, cf_set_time}
+
+#define CF_SECT_VARS 1 /* senction contains pre-defined key-var pairs */
+#define CF_SECT_DATA 2 /* key-val pairs are data */
+
+typedef struct ConfElem ConfElem;
+
+/* callback for CF_SECT_DATA loading */
+typedef void (*conf_data_callback_fn)(char *key, char *value);
+
+typedef const char * (*conf_var_get_fn)(ConfElem *elem);
+typedef bool (*conf_var_set_fn)(ConfElem *elem, const char *value, PgSocket *console);
+
+typedef struct {
+ conf_var_get_fn fn_get;
+ conf_var_set_fn fn_set;
+} ConfAccess;
+
+struct ConfElem {
+ const char *name;
+ bool reloadable;
+ ConfAccess io;
+ void *dst;
+ bool allocated;
+};
+
+typedef struct ConfSection {
+ const char *name;
+ ConfElem *elem_list;
+ conf_data_callback_fn data_fn;
+} ConfSection;
+
+void iniparser(const char *fn, ConfSection *sect_list, bool reload);
+
+const char * cf_get_int(ConfElem *elem);
+bool cf_set_int(ConfElem *elem, const char *value, PgSocket *console);
+
+const char * cf_get_time(ConfElem *elem);
+bool cf_set_time(ConfElem *elem, const char *value, PgSocket *console);
+
+const char *cf_get_str(ConfElem *elem);
+bool cf_set_str(ConfElem *elem, const char *value, PgSocket *console);
+
+const char *conf_to_text(ConfElem *elem);
+bool set_config_param(ConfElem *elem_list, const char *key, const char *val, bool reload, PgSocket *console);
+
+/* connstring parsing */
+void parse_database(char *name, char *connstr);
+
+/* user file parsing */
+bool load_auth_file(const char *fn);
+bool loader_users_check(void);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Launcer for all the rest.
+ */
+
+#include "bouncer.h"
+
+#include <signal.h>
+#include <getopt.h>
+
+static bool set_mode(ConfElem *elem, const char *val, PgSocket *console);
+static const char *get_mode(ConfElem *elem);
+static bool set_auth(ConfElem *elem, const char *val, PgSocket *console);
+static const char *get_auth(ConfElem *elem);
+
+static const char *usage_str =
+"usage: pgbouncer [-d] [-v] [-h|-V] config.ini\n";
+
+static void usage(int err)
+{
+ printf(usage_str);
+ exit(err);
+}
+
+/*
+ * configuration storage
+ */
+
+int cf_verbose = 0;
+int cf_daemon = 0;
+int cf_pause_mode = 0;
+int cf_shutdown = 0;
+int cf_reboot = 0;
+static char *cf_config_file;
+
+char *cf_listen_addr = NULL;
+int cf_listen_port = 6000;
+char *cf_unix_socket_dir = "/tmp";
+
+int cf_pool_mode = POOL_SESSION;
+
+/* sbuf config */
+int cf_sbuf_len = 2048;
+int cf_tcp_socket_buffer = 0;
+#ifdef TCP_DEFER_ACCEPT
+int cf_tcp_defer_accept = 45;
+#else
+int cf_tcp_defer_accept = 0;
+#endif
+int cf_tcp_keepalive = 0;
+int cf_tcp_keepcnt = 0;
+int cf_tcp_keepidle = 0;
+int cf_tcp_keepintvl = 0;
+
+int cf_auth_type = AUTH_MD5;
+char *cf_auth_file = "unconfigured_file";
+
+int cf_max_client_conn = 20;
+int cf_default_pool_size = 10;
+
+char *cf_server_check_query = "select 1";
+usec_t cf_server_check_delay = 30 * USEC;
+
+usec_t cf_server_lifetime = 60*60*USEC;
+usec_t cf_server_idle_timeout = 10*60*USEC;
+usec_t cf_server_connect_timeout = 15*USEC;
+usec_t cf_server_login_retry = 15*USEC;
+usec_t cf_query_timeout = 0*USEC;
+usec_t cf_client_idle_timeout = 0*USEC;
+
+char *cf_logfile = NULL;
+char *cf_pidfile = NULL;
+static char *cf_jobname = NULL;
+
+char *cf_admin_users = "";
+char *cf_stats_users = "";
+int cf_stats_period = 60;
+
+
+/*
+ * config file description
+ */
+ConfElem bouncer_params[] = {
+{"job_name", true, CF_STR, &cf_jobname},
+{"conffile", true, CF_STR, &cf_config_file},
+{"logfile", true, CF_STR, &cf_logfile},
+{"pidfile", false, CF_STR, &cf_pidfile},
+{"listen_addr", false, CF_STR, &cf_listen_addr},
+{"listen_port", false, CF_INT, &cf_listen_port},
+{"unix_socket_dir", false, CF_STR, &cf_unix_socket_dir},
+{"auth_type", true, {get_auth, set_auth}},
+{"auth_file", true, CF_STR, &cf_auth_file},
+{"pool_mode", true, {get_mode, set_mode}},
+{"max_client_conn", true, CF_INT, &cf_max_client_conn},
+{"default_pool_size", true, CF_INT, &cf_default_pool_size},
+
+{"server_check_query", true, CF_STR, &cf_server_check_query},
+{"server_check_delay", true, CF_TIME, &cf_server_check_delay},
+{"query_timeout", true, CF_TIME, &cf_query_timeout},
+{"client_idle_timeout", true, CF_TIME, &cf_client_idle_timeout},
+{"server_lifetime", true, CF_TIME, &cf_server_lifetime},
+{"server_idle_timeout", true, CF_TIME, &cf_server_idle_timeout},
+{"server_connect_timeout",true, CF_TIME, &cf_server_connect_timeout},
+{"server_login_retry", true, CF_TIME, &cf_server_login_retry},
+
+{"pkt_buf", false, CF_INT, &cf_sbuf_len},
+{"tcp_defer_accept", false, CF_INT, &cf_tcp_defer_accept},
+{"tcp_socket_buffer", true, CF_INT, &cf_tcp_socket_buffer},
+{"tcp_keepalive", true, CF_INT, &cf_tcp_keepalive},
+{"tcp_keepcnt", true, CF_INT, &cf_tcp_keepcnt},
+{"tcp_keepidle", true, CF_INT, &cf_tcp_keepidle},
+{"tcp_keepintvl", true, CF_INT, &cf_tcp_keepintvl},
+{"verbose", true, CF_INT, &cf_verbose},
+{"admin_users", true, CF_STR, &cf_admin_users},
+{"stats_users", true, CF_STR, &cf_stats_users},
+{"stats_period", true, CF_INT, &cf_stats_period},
+{NULL},
+};
+
+static ConfSection bouncer_config [] = {
+{"pgbouncer", bouncer_params, NULL},
+{"databases", NULL, parse_database},
+{NULL}
+};
+
+static const char *get_mode(ConfElem *elem)
+{
+ switch (cf_pool_mode) {
+ case POOL_STMT: return "statement";
+ case POOL_TX: return "transaction";
+ case POOL_SESSION: return "session";
+ default:
+ fatal("borken mode? should not happen");
+ return NULL;
+ }
+}
+
+static bool set_mode(ConfElem *elem, const char *val, PgSocket *console)
+{
+ if (strcasecmp(val, "session") == 0)
+ cf_pool_mode = POOL_SESSION;
+ else if (strcasecmp(val, "transaction") == 0)
+ cf_pool_mode = POOL_TX;
+ else if (strcasecmp(val, "statement") == 0)
+ cf_pool_mode = POOL_STMT;
+ else {
+ admin_error(console, "bad mode: %s", val);
+ return false;
+ }
+ return true;
+}
+
+static const char *get_auth(ConfElem *elem)
+{
+ switch (cf_auth_type) {
+ case AUTH_ANY: return "any";
+ case AUTH_TRUST: return "trust";
+ case AUTH_PLAIN: return "plain";
+ case AUTH_CRYPT: return "crypt";
+ case AUTH_MD5: return "md5";
+ default:
+ fatal("borken auth? should not happen");
+ return NULL;
+ }
+}
+
+static bool set_auth(ConfElem *elem, const char *val, PgSocket *console)
+{
+ if (strcasecmp(val, "any") == 0)
+ cf_auth_type = AUTH_ANY;
+ else if (strcasecmp(val, "trust") == 0)
+ cf_auth_type = AUTH_TRUST;
+ else if (strcasecmp(val, "plain") == 0)
+ cf_auth_type = AUTH_PLAIN;
+ else if (strcasecmp(val, "crypt") == 0)
+ cf_auth_type = AUTH_CRYPT;
+ else if (strcasecmp(val, "md5") == 0)
+ cf_auth_type = AUTH_MD5;
+ else {
+ admin_error(console, "bad auth type: %s", val);
+ return false;
+ }
+ return true;
+}
+
+/* config loading, tries to be tolerant to errors */
+void load_config(bool reload)
+{
+ /* actual loading */
+ iniparser(cf_config_file, bouncer_config, reload);
+
+ /* load users if needed */
+ if (cf_auth_type >= AUTH_TRUST)
+ load_auth_file(cf_auth_file);
+
+ /* reset pool_size */
+ config_postprocess();
+}
+
+/*
+ * signal handling.
+ *
+ * handle_* functions are not actual signal handlers but called from
+ * event_loop() so they have no restrictions what they can do.
+ */
+static struct event ev_sigterm;
+static struct event ev_sigint;
+static struct event ev_sigusr1;
+static struct event ev_sigusr2;
+static struct event ev_sighup;
+
+static void handle_sigterm(int sock, short flags, void *arg)
+{
+ log_info("Got SIGTERM, fast exit");
+ /* pidfile cleanup happens via atexit() */
+ exit(1);
+}
+
+static void handle_sigint(int sock, short flags, void *arg)
+{
+ log_info("Got SIGINT, shutting down");
+ cf_pause_mode = 1;
+ cf_shutdown = 1;
+}
+
+static void handle_sigusr1(int sock, short flags, void *arg)
+{
+ if (cf_pause_mode == 0) {
+ log_info("Got SIGUSR1, pausing all activity");
+ cf_pause_mode = 1;
+ } else {
+ log_info("Got SIGUSR1, but already paused/suspended");
+ }
+}
+
+static void handle_sigusr2(int sock, short flags, void *arg)
+{
+ switch (cf_pause_mode) {
+ case 2:
+ log_info("Got SIGUSR2, continuing from SUSPEND");
+ resume_all();
+ cf_pause_mode = 0;
+ break;
+ case 1:
+ log_info("Got SIGUSR2, continuing from PAUSE");
+ cf_pause_mode = 0;
+ break;
+ case 0:
+ log_info("Got SIGUSR1, but not paused/suspended");
+ }
+}
+
+static void handle_sighup(int sock, short flags, void *arg)
+{
+ log_info("Got SIGHUP re-reading config");
+ load_config(true);
+}
+
+static void signal_setup(void)
+{
+ int err;
+ sigset_t set;
+
+ /* block SIGPIPE */
+ sigemptyset(&set);
+ sigaddset(&set, SIGPIPE);
+ err = sigprocmask(SIG_BLOCK, &set, NULL);
+ if (err < 0)
+ fatal_perror("sigprocmask");
+
+ /* install handlers */
+ signal_set(&ev_sigterm, SIGTERM, handle_sigterm, NULL);
+ signal_add(&ev_sigterm, NULL);
+ signal_set(&ev_sigint, SIGINT, handle_sigint, NULL);
+ signal_add(&ev_sigint, NULL);
+ signal_set(&ev_sigusr1, SIGUSR1, handle_sigusr1, NULL);
+ signal_add(&ev_sigusr1, NULL);
+ signal_set(&ev_sigusr2, SIGUSR2, handle_sigusr2, NULL);
+ signal_add(&ev_sigusr2, NULL);
+ signal_set(&ev_sighup, SIGHUP, handle_sighup, NULL);
+ signal_add(&ev_sighup, NULL);
+}
+
+/*
+ * daemon mode
+ */
+static void go_daemon(void)
+{
+ int pid, fd;
+
+ if (!cf_pidfile)
+ fatal("daemon needs pidfile configured");
+
+ /* just in case close all files */
+ for (fd = 3; fd < OPEN_MAX; fd++)
+ close(fd);
+
+ /* send stdin, stdout, stderr to /dev/null */
+ fd = open("/dev/null", O_RDWR);
+ if (fd < 0)
+ fatal_perror("/dev/null");
+ dup2(fd, 0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ if (fd > 2)
+ close(fd);
+
+ /* fork new process */
+ pid = fork();
+ if (pid < 0)
+ fatal_perror("fork");
+ if (pid > 0)
+ _exit(0);
+
+ /* create new session */
+ pid = setsid();
+ if (pid < 0)
+ fatal_perror("setsid");
+
+ /* fork again to avoid being session leader */
+ pid = fork();
+ if (pid < 0)
+ fatal_perror("fork");
+ if (pid > 0)
+ _exit(0);
+
+}
+
+/*
+ * write pidfile. if exists, quit with error.
+ */
+static void check_pidfile(void)
+{
+ struct stat st;
+ if (!cf_pidfile)
+ return;
+ if (stat(cf_pidfile, &st) >= 0)
+ fatal("pidfile exists, another instance running?");
+}
+
+static void remove_pidfile(void)
+{
+ if (!cf_pidfile)
+ return;
+ unlink(cf_pidfile);
+}
+
+static void write_pidfile(void)
+{
+ char buf[64];
+ pid_t pid;
+ int res, fd;
+
+ if (!cf_pidfile)
+ return;
+
+ pid = getpid();
+ sprintf(buf, "%u", (unsigned)pid);
+
+ fd = open(cf_pidfile, O_WRONLY | O_CREAT | O_EXCL, 0644);
+ if (fd < 0)
+ fatal_perror(cf_pidfile);
+ res = safe_write(fd, buf, strlen(buf));
+ if (res < 0)
+ fatal_perror(cf_pidfile);
+ safe_close(fd);
+
+ /* only remove when we have it actually written */
+ atexit(remove_pidfile);
+}
+
+static void daemon_setup(void)
+{
+ if (!cf_reboot)
+ check_pidfile();
+ if (cf_daemon)
+ go_daemon();
+ if (!cf_reboot)
+ write_pidfile();
+}
+
+static void main_loop_once(void)
+{
+ reset_time_cache();
+ event_loop(EVLOOP_ONCE);
+ per_loop_object_maint();
+}
+
+/* boot everything */
+int main(int argc, char *argv[])
+{
+ int c;
+
+ /* parse cmdline */
+ while ((c = getopt(argc, argv, "vhdVR")) != EOF) {
+ switch (c) {
+ case 'R':
+ cf_reboot = 1;
+ break;
+ case 'v':
+ cf_verbose++;
+ break;
+ case 'V':
+ printf("%s version %s\n", PACKAGE_NAME, PACKAGE_VERSION);
+ return 0;
+ case 'd':
+ cf_daemon = 1;
+ break;
+ case 'h':
+ default:
+ usage(1);
+ }
+ }
+ if (optind + 1 != argc)
+ usage(1);
+ cf_config_file = argv[optind];
+ load_config(false);
+
+ /* init random */
+ srandom(time(NULL) ^ getpid());
+
+ /* initialize subsystems, order important */
+ daemon_setup();
+ event_init();
+ signal_setup();
+ janitor_setup();
+ stats_setup();
+ admin_setup();
+
+ if (cf_reboot) {
+ takeover_init();
+ while (cf_reboot)
+ main_loop_once();
+ write_pidfile();
+ } else
+ pooler_setup();
+
+ /* main loop */
+ while (1)
+ main_loop_once();
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Safe and easy access to fixed memory buffer
+ */
+
+typedef struct MBuf MBuf;
+struct MBuf {
+ const uint8 *data;
+ const uint8 *end;
+ const uint8 *pos;
+};
+
+static inline void mbuf_init(MBuf *buf, const uint8 *ptr, int len)
+{
+ if (len < 0)
+ fatal("fuckup");
+ buf->data = buf->pos = ptr;
+ buf->end = ptr + len;
+}
+
+static inline uint8 mbuf_get_char(MBuf *buf)
+{
+ if (buf->pos + 1 > buf->end)
+ fatal("buffer overflow");
+ return *buf->pos++;
+}
+
+static inline unsigned mbuf_get_uint16(MBuf *buf)
+{
+ unsigned val;
+ if (buf->pos + 2 > buf->end)
+ fatal("buffer overflow");
+ val = *buf->pos++;
+ val = (val << 8) | *buf->pos++;
+ return val;
+}
+
+static inline unsigned mbuf_get_uint32(MBuf *buf)
+{
+ unsigned val;
+ if (buf->pos + 4 > buf->end)
+ fatal("buffer overflow");
+ val = *buf->pos++;
+ val = (val << 8) | *buf->pos++;
+ val = (val << 8) | *buf->pos++;
+ val = (val << 8) | *buf->pos++;
+ return val;
+}
+
+static inline unsigned mbuf_get_uint64(MBuf *buf)
+{
+ uint64 i1, i2;
+ i1 = mbuf_get_uint32(buf);
+ i2 = mbuf_get_uint32(buf);
+ return (i1 << 32) | i2;
+}
+
+static inline const uint8 * mbuf_get_bytes(MBuf *buf, unsigned len)
+{
+ const uint8 *res = buf->pos;
+ if (len > buf->end - buf->pos)
+ fatal("buffer overflow");
+ buf->pos += len;
+ return res;
+}
+
+static inline const char * mbuf_get_string(MBuf *buf)
+{
+ const char *res = (const char *)buf->pos;
+ while (buf->pos < buf->end && *buf->pos)
+ buf->pos++;
+ if (buf->pos == buf->end)
+ return NULL;
+ buf->pos++;
+ return res;
+}
+
+static inline unsigned mbuf_avail(MBuf *buf)
+{
+ return buf->end - buf->pos;
+}
+
+static inline unsigned mbuf_size(MBuf *buf)
+{
+ return buf->end - buf->data;
+}
+
--- /dev/null
+/* $KAME: md5.c,v 1.3 2000/02/22 14:01:17 itojun Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $PostgreSQL: pgsql/contrib/pgcrypto/md5.c,v 1.13 2005/07/11 15:07:59 tgl Exp $
+ */
+
+#include "bouncer.h"
+
+#include <sys/param.h>
+
+#include "md5.h"
+
+/* sanity check */
+#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN)
+#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN
+#endif
+
+#define SHIFT(X, s) (((X) << (s)) | ((X) >> (32 - (s))))
+
+#define F(X, Y, Z) (((X) & (Y)) | ((~X) & (Z)))
+#define G(X, Y, Z) (((X) & (Z)) | ((Y) & (~Z)))
+#define H(X, Y, Z) ((X) ^ (Y) ^ (Z))
+#define I(X, Y, Z) ((Y) ^ ((X) | (~Z)))
+
+#define ROUND1(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + F((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define ROUND2(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + G((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define ROUND3(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + H((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define ROUND4(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + I((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define Sa 7
+#define Sb 12
+#define Sc 17
+#define Sd 22
+
+#define Se 5
+#define Sf 9
+#define Sg 14
+#define Sh 20
+
+#define Si 4
+#define Sj 11
+#define Sk 16
+#define Sl 23
+
+#define Sm 6
+#define Sn 10
+#define So 15
+#define Sp 21
+
+#define MD5_A0 0x67452301
+#define MD5_B0 0xefcdab89
+#define MD5_C0 0x98badcfe
+#define MD5_D0 0x10325476
+
+/* Integer part of 4294967296 times abs(sin(i)), where i is in radians. */
+static const uint32 T[65] = {
+ 0,
+ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+ 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+ 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+
+ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+ 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8,
+ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+ 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+
+ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+ 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
+ 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+
+ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+ 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+ 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+static const uint8 md5_paddat[MD5_BUFLEN] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static void md5_calc(uint8 *, md5_ctxt *);
+
+void
+md5_init(md5_ctxt * ctxt)
+{
+ ctxt->md5_n = 0;
+ ctxt->md5_i = 0;
+ ctxt->md5_sta = MD5_A0;
+ ctxt->md5_stb = MD5_B0;
+ ctxt->md5_stc = MD5_C0;
+ ctxt->md5_std = MD5_D0;
+ memset(ctxt->md5_buf, 0, sizeof(ctxt->md5_buf));
+}
+
+void
+md5_loop(md5_ctxt * ctxt, const uint8 *input, unsigned len)
+{
+ unsigned int gap,
+ i;
+
+ ctxt->md5_n += len * 8; /* byte to bit */
+ gap = MD5_BUFLEN - ctxt->md5_i;
+
+ if (len >= gap)
+ {
+ memmove(ctxt->md5_buf + ctxt->md5_i, input, gap);
+ md5_calc(ctxt->md5_buf, ctxt);
+
+ for (i = gap; i + MD5_BUFLEN <= len; i += MD5_BUFLEN)
+ md5_calc((uint8 *) (input + i), ctxt);
+
+ ctxt->md5_i = len - i;
+ memmove(ctxt->md5_buf, input + i, ctxt->md5_i);
+ }
+ else
+ {
+ memmove(ctxt->md5_buf + ctxt->md5_i, input, len);
+ ctxt->md5_i += len;
+ }
+}
+
+void
+md5_pad(md5_ctxt * ctxt)
+{
+ unsigned int gap;
+
+ /* Don't count up padding. Keep md5_n. */
+ gap = MD5_BUFLEN - ctxt->md5_i;
+ if (gap > 8)
+ {
+ memmove(ctxt->md5_buf + ctxt->md5_i, md5_paddat,
+ gap - sizeof(ctxt->md5_n));
+ }
+ else
+ {
+ /* including gap == 8 */
+ memmove(ctxt->md5_buf + ctxt->md5_i, md5_paddat, gap);
+ md5_calc(ctxt->md5_buf, ctxt);
+ memmove(ctxt->md5_buf, md5_paddat + gap,
+ MD5_BUFLEN - sizeof(ctxt->md5_n));
+ }
+
+ /* 8 byte word */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ memmove(&ctxt->md5_buf[56], &ctxt->md5_n8[0], 8);
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ ctxt->md5_buf[56] = ctxt->md5_n8[7];
+ ctxt->md5_buf[57] = ctxt->md5_n8[6];
+ ctxt->md5_buf[58] = ctxt->md5_n8[5];
+ ctxt->md5_buf[59] = ctxt->md5_n8[4];
+ ctxt->md5_buf[60] = ctxt->md5_n8[3];
+ ctxt->md5_buf[61] = ctxt->md5_n8[2];
+ ctxt->md5_buf[62] = ctxt->md5_n8[1];
+ ctxt->md5_buf[63] = ctxt->md5_n8[0];
+#endif
+
+ md5_calc(ctxt->md5_buf, ctxt);
+}
+
+void
+md5_result(uint8 *digest, md5_ctxt * ctxt)
+{
+ /* 4 byte words */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ memmove(digest, &ctxt->md5_st8[0], 16);
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ digest[0] = ctxt->md5_st8[3];
+ digest[1] = ctxt->md5_st8[2];
+ digest[2] = ctxt->md5_st8[1];
+ digest[3] = ctxt->md5_st8[0];
+ digest[4] = ctxt->md5_st8[7];
+ digest[5] = ctxt->md5_st8[6];
+ digest[6] = ctxt->md5_st8[5];
+ digest[7] = ctxt->md5_st8[4];
+ digest[8] = ctxt->md5_st8[11];
+ digest[9] = ctxt->md5_st8[10];
+ digest[10] = ctxt->md5_st8[9];
+ digest[11] = ctxt->md5_st8[8];
+ digest[12] = ctxt->md5_st8[15];
+ digest[13] = ctxt->md5_st8[14];
+ digest[14] = ctxt->md5_st8[13];
+ digest[15] = ctxt->md5_st8[12];
+#endif
+}
+
+#if BYTE_ORDER == BIG_ENDIAN
+static uint32 X[16];
+#endif
+
+static void
+md5_calc(uint8 *b64, md5_ctxt * ctxt)
+{
+ uint32 A = ctxt->md5_sta;
+ uint32 B = ctxt->md5_stb;
+ uint32 C = ctxt->md5_stc;
+ uint32 D = ctxt->md5_std;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint32 *X = (uint32 *) b64;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ /* 4 byte words */
+ /* what a brute force but fast! */
+ uint8 *y = (uint8 *) X;
+
+ y[0] = b64[3];
+ y[1] = b64[2];
+ y[2] = b64[1];
+ y[3] = b64[0];
+ y[4] = b64[7];
+ y[5] = b64[6];
+ y[6] = b64[5];
+ y[7] = b64[4];
+ y[8] = b64[11];
+ y[9] = b64[10];
+ y[10] = b64[9];
+ y[11] = b64[8];
+ y[12] = b64[15];
+ y[13] = b64[14];
+ y[14] = b64[13];
+ y[15] = b64[12];
+ y[16] = b64[19];
+ y[17] = b64[18];
+ y[18] = b64[17];
+ y[19] = b64[16];
+ y[20] = b64[23];
+ y[21] = b64[22];
+ y[22] = b64[21];
+ y[23] = b64[20];
+ y[24] = b64[27];
+ y[25] = b64[26];
+ y[26] = b64[25];
+ y[27] = b64[24];
+ y[28] = b64[31];
+ y[29] = b64[30];
+ y[30] = b64[29];
+ y[31] = b64[28];
+ y[32] = b64[35];
+ y[33] = b64[34];
+ y[34] = b64[33];
+ y[35] = b64[32];
+ y[36] = b64[39];
+ y[37] = b64[38];
+ y[38] = b64[37];
+ y[39] = b64[36];
+ y[40] = b64[43];
+ y[41] = b64[42];
+ y[42] = b64[41];
+ y[43] = b64[40];
+ y[44] = b64[47];
+ y[45] = b64[46];
+ y[46] = b64[45];
+ y[47] = b64[44];
+ y[48] = b64[51];
+ y[49] = b64[50];
+ y[50] = b64[49];
+ y[51] = b64[48];
+ y[52] = b64[55];
+ y[53] = b64[54];
+ y[54] = b64[53];
+ y[55] = b64[52];
+ y[56] = b64[59];
+ y[57] = b64[58];
+ y[58] = b64[57];
+ y[59] = b64[56];
+ y[60] = b64[63];
+ y[61] = b64[62];
+ y[62] = b64[61];
+ y[63] = b64[60];
+#endif
+
+ ROUND1(A, B, C, D, 0, Sa, 1);
+ ROUND1(D, A, B, C, 1, Sb, 2);
+ ROUND1(C, D, A, B, 2, Sc, 3);
+ ROUND1(B, C, D, A, 3, Sd, 4);
+ ROUND1(A, B, C, D, 4, Sa, 5);
+ ROUND1(D, A, B, C, 5, Sb, 6);
+ ROUND1(C, D, A, B, 6, Sc, 7);
+ ROUND1(B, C, D, A, 7, Sd, 8);
+ ROUND1(A, B, C, D, 8, Sa, 9);
+ ROUND1(D, A, B, C, 9, Sb, 10);
+ ROUND1(C, D, A, B, 10, Sc, 11);
+ ROUND1(B, C, D, A, 11, Sd, 12);
+ ROUND1(A, B, C, D, 12, Sa, 13);
+ ROUND1(D, A, B, C, 13, Sb, 14);
+ ROUND1(C, D, A, B, 14, Sc, 15);
+ ROUND1(B, C, D, A, 15, Sd, 16);
+
+ ROUND2(A, B, C, D, 1, Se, 17);
+ ROUND2(D, A, B, C, 6, Sf, 18);
+ ROUND2(C, D, A, B, 11, Sg, 19);
+ ROUND2(B, C, D, A, 0, Sh, 20);
+ ROUND2(A, B, C, D, 5, Se, 21);
+ ROUND2(D, A, B, C, 10, Sf, 22);
+ ROUND2(C, D, A, B, 15, Sg, 23);
+ ROUND2(B, C, D, A, 4, Sh, 24);
+ ROUND2(A, B, C, D, 9, Se, 25);
+ ROUND2(D, A, B, C, 14, Sf, 26);
+ ROUND2(C, D, A, B, 3, Sg, 27);
+ ROUND2(B, C, D, A, 8, Sh, 28);
+ ROUND2(A, B, C, D, 13, Se, 29);
+ ROUND2(D, A, B, C, 2, Sf, 30);
+ ROUND2(C, D, A, B, 7, Sg, 31);
+ ROUND2(B, C, D, A, 12, Sh, 32);
+
+ ROUND3(A, B, C, D, 5, Si, 33);
+ ROUND3(D, A, B, C, 8, Sj, 34);
+ ROUND3(C, D, A, B, 11, Sk, 35);
+ ROUND3(B, C, D, A, 14, Sl, 36);
+ ROUND3(A, B, C, D, 1, Si, 37);
+ ROUND3(D, A, B, C, 4, Sj, 38);
+ ROUND3(C, D, A, B, 7, Sk, 39);
+ ROUND3(B, C, D, A, 10, Sl, 40);
+ ROUND3(A, B, C, D, 13, Si, 41);
+ ROUND3(D, A, B, C, 0, Sj, 42);
+ ROUND3(C, D, A, B, 3, Sk, 43);
+ ROUND3(B, C, D, A, 6, Sl, 44);
+ ROUND3(A, B, C, D, 9, Si, 45);
+ ROUND3(D, A, B, C, 12, Sj, 46);
+ ROUND3(C, D, A, B, 15, Sk, 47);
+ ROUND3(B, C, D, A, 2, Sl, 48);
+
+ ROUND4(A, B, C, D, 0, Sm, 49);
+ ROUND4(D, A, B, C, 7, Sn, 50);
+ ROUND4(C, D, A, B, 14, So, 51);
+ ROUND4(B, C, D, A, 5, Sp, 52);
+ ROUND4(A, B, C, D, 12, Sm, 53);
+ ROUND4(D, A, B, C, 3, Sn, 54);
+ ROUND4(C, D, A, B, 10, So, 55);
+ ROUND4(B, C, D, A, 1, Sp, 56);
+ ROUND4(A, B, C, D, 8, Sm, 57);
+ ROUND4(D, A, B, C, 15, Sn, 58);
+ ROUND4(C, D, A, B, 6, So, 59);
+ ROUND4(B, C, D, A, 13, Sp, 60);
+ ROUND4(A, B, C, D, 4, Sm, 61);
+ ROUND4(D, A, B, C, 11, Sn, 62);
+ ROUND4(C, D, A, B, 2, So, 63);
+ ROUND4(B, C, D, A, 9, Sp, 64);
+
+ ctxt->md5_sta += A;
+ ctxt->md5_stb += B;
+ ctxt->md5_stc += C;
+ ctxt->md5_std += D;
+}
+
+/* vi: set ts=4: */
--- /dev/null
+/* $PostgreSQL: pgsql/contrib/pgcrypto/md5.h,v 1.9 2005/10/15 02:49:06 momjian Exp $ */
+/* $KAME: md5.h,v 1.3 2000/02/22 14:01:18 itojun Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET6_MD5_H_
+#define _NETINET6_MD5_H_
+
+#define MD5_BUFLEN 64
+#define MD5_DIGEST_LENGTH 16
+
+typedef struct
+{
+ union
+ {
+ uint32 md5_state32[4];
+ uint8 md5_state8[16];
+ } md5_st;
+
+#define md5_sta md5_st.md5_state32[0]
+#define md5_stb md5_st.md5_state32[1]
+#define md5_stc md5_st.md5_state32[2]
+#define md5_std md5_st.md5_state32[3]
+#define md5_st8 md5_st.md5_state8
+
+ union
+ {
+ uint64 md5_count64;
+ uint8 md5_count8[8];
+ } md5_count;
+#define md5_n md5_count.md5_count64
+#define md5_n8 md5_count.md5_count8
+
+ unsigned int md5_i;
+ uint8 md5_buf[MD5_BUFLEN];
+} md5_ctxt;
+
+extern void md5_init(md5_ctxt *);
+extern void md5_loop(md5_ctxt *, const uint8 *, unsigned int);
+extern void md5_pad(md5_ctxt *);
+extern void md5_result(uint8 *, md5_ctxt *);
+
+/* compatibility with OpenSSL */
+#define MD5_CTX md5_ctxt
+#define MD5_Init(x) md5_init((x))
+#define MD5_Update(x, y, z) md5_loop((x), (void*)(y), (z))
+#define MD5_Final(x, y) \
+do { \
+ md5_pad((y)); \
+ md5_result((x), (y)); \
+} while (0)
+
+#endif /* ! _NETINET6_MD5_H_ */
+
+/* vi: set ts=4: */
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Herding objects between lists happens here.
+ */
+
+#include "bouncer.h"
+
+/* those items will be allocated as needed, never freed */
+STATLIST(user_list);
+STATLIST(database_list);
+STATLIST(pool_list);
+
+/*
+ * client and server objects will be pre-allocated
+ * they are always in either active or free lists
+ * in addition to others.
+ */
+STATLIST(free_client_list);
+STATLIST(free_server_list);
+STATLIST(login_client_list);
+
+/* how many client sockets are allocated */
+static int absolute_client_count = 0;
+/* how many server sockets are allocated */
+static int absolute_server_count = 0;
+
+/* list of users ordered by name */
+static PgUser **user_lookup = NULL;
+
+/* drop lookup list because it will be out of sync */
+static void reset_auth_cache(void)
+{
+ if (user_lookup != NULL) {
+ free(user_lookup);
+ user_lookup = NULL;
+ }
+}
+
+/* fast way to get number of active clients */
+int get_active_client_count(void)
+{
+ return absolute_client_count - statlist_count(&free_client_list);
+}
+
+/* fast way to get number of active servers */
+int get_active_server_count(void)
+{
+ return absolute_server_count - statlist_count(&free_server_list);
+}
+
+/* this should be called on free socket that is put into use */
+static void clean_socket(PgSocket *sk)
+{
+ sk->link = NULL;
+ sk->pool = NULL;
+
+ sk->wait_for_welcome = 0;
+ sk->ready = 0;
+ sk->flush_req = 0;
+ sk->admin_user = 0;
+ sk->own_user = 0;
+ sk->suspended = 0;
+ sk->wait_for_response = 0;
+
+ sk->connect_time = 0;
+ sk->request_time = 0;
+ sk->query_start = 0;
+
+ sk->auth_user = NULL;
+}
+
+/* allocate & fll client socket */
+static PgSocket *new_client(void)
+{
+ PgSocket *client;
+
+ /* get free PgSocket */
+ client = first_socket(&free_client_list);
+ if (client) {
+ clean_socket(client);
+ return client;
+ }
+
+ client = zmalloc(sizeof(*client) + cf_sbuf_len);
+ if (!client)
+ return NULL;
+
+ list_init(&client->head);
+ sbuf_init(&client->sbuf, client_proto, client);
+ statlist_prepend(&client->head, &free_client_list);
+ client->state = CL_FREE;
+
+ absolute_client_count++;
+
+ return client;
+}
+
+/* allocate & fill server socket */
+static PgSocket *new_server(void)
+{
+ PgSocket *server;
+
+ /* get free PgSocket */
+ server = first_socket(&free_server_list);
+ if (server) {
+ clean_socket(server);
+ return server;
+ }
+
+ server = zmalloc(sizeof(*server) + cf_sbuf_len);
+ if (!server)
+ return NULL;
+
+ list_init(&server->head);
+ sbuf_init(&server->sbuf, server_proto, server);
+ statlist_prepend(&server->head, &free_server_list);
+ server->state = SV_FREE;
+
+ absolute_server_count++;
+
+ return server;
+}
+
+/* state change means moving between lists */
+void change_client_state(PgSocket *client, SocketState newstate)
+{
+ PgPool *pool = client->pool;
+
+ /* remove from old location */
+ switch (client->state) {
+ case CL_FREE:
+ statlist_remove(&client->head, &free_client_list);
+ break;
+ case CL_LOGIN:
+ statlist_remove(&client->head, &login_client_list);
+ break;
+ case CL_WAITING:
+ statlist_remove(&client->head, &pool->waiting_client_list);
+ break;
+ case CL_ACTIVE:
+ statlist_remove(&client->head, &pool->active_client_list);
+ break;
+ case CL_CANCEL:
+ statlist_remove(&client->head, &pool->cancel_req_list);
+ break;
+ default:
+ fatal("bad cur client state: %d", client->state);
+ }
+
+ client->state = newstate;
+
+ /* put to new location */
+ switch (client->state) {
+ case CL_FREE:
+ /* use LIFO the keep cache warm */
+ statlist_prepend(&client->head, &free_client_list);
+ break;
+ case CL_LOGIN:
+ statlist_append(&client->head, &login_client_list);
+ break;
+ case CL_WAITING:
+ statlist_append(&client->head, &pool->waiting_client_list);
+ break;
+ case CL_ACTIVE:
+ statlist_append(&client->head, &pool->active_client_list);
+ break;
+ case CL_CANCEL:
+ statlist_append(&client->head, &pool->cancel_req_list);
+ break;
+ default:
+ fatal("bad new client state: %d", client->state);
+ }
+}
+
+/* state change means moving between lists */
+void change_server_state(PgSocket *server, SocketState newstate)
+{
+ PgPool *pool = server->pool;
+
+ /* remove from old location */
+ switch (server->state) {
+ case SV_FREE:
+ statlist_remove(&server->head, &free_server_list);
+ break;
+ case SV_LOGIN:
+ statlist_remove(&server->head, &pool->new_server_list);
+ break;
+ case SV_USED:
+ statlist_remove(&server->head, &pool->used_server_list);
+ break;
+ case SV_TESTED:
+ statlist_remove(&server->head, &pool->tested_server_list);
+ break;
+ case SV_IDLE:
+ statlist_remove(&server->head, &pool->idle_server_list);
+ break;
+ case SV_ACTIVE:
+ statlist_remove(&server->head, &pool->active_server_list);
+ break;
+ default:
+ fatal("change_server_state: bad old server state: %d", server->state);
+ }
+
+ server->state = newstate;
+
+ /* put to new location */
+ switch (server->state) {
+ case SV_FREE:
+ /* use LIFO the keep cache warm */
+ statlist_prepend(&server->head, &free_server_list);
+ break;
+ case SV_LOGIN:
+ statlist_append(&server->head, &pool->new_server_list);
+ break;
+ case SV_USED:
+ /* again, LIFO */
+ statlist_prepend(&server->head, &pool->used_server_list);
+ break;
+ case SV_TESTED:
+ statlist_append(&server->head, &pool->tested_server_list);
+ break;
+ case SV_IDLE:
+ if (server->close_needed)
+ /* try to avoid immidiate usage then */
+ statlist_append(&server->head, &pool->idle_server_list);
+ else
+ /* otherwise use LIFO */
+ statlist_prepend(&server->head, &pool->idle_server_list);
+ break;
+ case SV_ACTIVE:
+ statlist_append(&server->head, &pool->active_server_list);
+ break;
+ default:
+ fatal("bad server state");
+ }
+}
+
+/* compare pool names, for use with put_in_order */
+static int cmp_pool(List *i1, List *i2)
+{
+ PgPool *p1 = container_of(i1, PgPool, head);
+ PgPool *p2 = container_of(i2, PgPool, head);
+ if (p1->db != p2->db)
+ return strcmp(p1->db->name, p2->db->name);
+ if (p1->user != p2->user)
+ return strcmp(p1->user->name, p2->user->name);
+ return 0;
+}
+
+/* compare user names, for use with put_in_order */
+static int cmp_user(List *i1, List *i2)
+{
+ PgUser *u1 = container_of(i1, PgUser, head);
+ PgUser *u2 = container_of(i2, PgUser, head);
+ return strcmp(u1->name, u2->name);
+}
+
+/* compare db names, for use with put_in_order */
+static int cmp_database(List *i1, List *i2)
+{
+ PgDatabase *db1 = container_of(i1, PgDatabase, head);
+ PgDatabase *db2 = container_of(i2, PgDatabase, head);
+ return strcmp(db1->name, db2->name);
+}
+
+/* put elem into list in correct pos */
+static void put_in_order(List *newitem, StatList *list, int (*cmpfn)(List *, List *))
+{
+ int res;
+ List *item;
+
+ statlist_for_each(item, list) {
+ res = cmpfn(item, newitem);
+ if (res == 0)
+ fatal("put_in_order: found existing elem");
+ else if (res > 0) {
+ statlist_put_before(newitem, list, item);
+ return;
+ }
+ }
+ statlist_append(newitem, list);
+}
+
+/* create new object if new, then return it */
+PgDatabase *add_database(const char *name)
+{
+ PgDatabase *db = find_database(name);
+
+ /* create new object if needed */
+ if (db == NULL) {
+ db = zmalloc(sizeof(*db));
+ if (!db)
+ return NULL;
+
+ list_init(&db->head);
+ strlcpy(db->name, name, sizeof(db->name));
+ put_in_order(&db->head, &database_list, cmp_database);
+ }
+
+ return db;
+}
+
+/* add or update client users */
+PgUser *add_user(const char *name, const char *passwd)
+{
+ PgUser *user = find_user(name);
+
+ reset_auth_cache();
+
+ if (user == NULL) {
+ user = zmalloc(sizeof(*user));
+ if (!user)
+ return NULL;
+
+ list_init(&user->head);
+ list_init(&user->pool_list);
+ strlcpy(user->name, name, sizeof(user->name));
+ put_in_order(&user->head, &user_list, cmp_user);
+ }
+ strlcpy(user->passwd, passwd, sizeof(user->passwd));
+ return user;
+}
+
+/* create separate user object for storing server user info */
+PgUser *force_user(PgDatabase *db, const char *name, const char *passwd)
+{
+ PgUser *user = db->forced_user;
+ if (!user) {
+ user = zmalloc(sizeof(*user));
+ if (!user)
+ return NULL;
+ list_init(&user->head);
+ list_init(&user->pool_list);
+ }
+ strlcpy(user->name, name, sizeof(user->name));
+ strlcpy(user->passwd, passwd, sizeof(user->passwd));
+ db->forced_user = user;
+ return user;
+}
+
+/* find a existing database */
+PgDatabase *find_database(const char *name)
+{
+ List *item;
+ PgDatabase *db;
+ statlist_for_each(item, &database_list) {
+ db = container_of(item, PgDatabase, head);
+ if (strcmp(db->name, name) == 0)
+ return db;
+ }
+ return NULL;
+}
+
+/* compare string with PgUser->name, for usage with bsearch() */
+static int user_name_cmp(const void *namestr, const void *userptr)
+{
+ const PgUser * const *user_p = userptr;
+ const PgUser *user = *user_p;
+ return strcmp(namestr, user->name);
+}
+
+/* find existing user */
+PgUser *find_user(const char *name)
+{
+ List *item;
+ PgUser *user;
+
+ /* if lookup table is available, use faster method */
+ if (user_lookup) {
+ PgUser **res;
+ res = bsearch(name, user_lookup,
+ statlist_count(&user_list),
+ sizeof(PgUser *),
+ user_name_cmp);
+ return res ? *res : NULL;
+ }
+
+ /* slow lookup */
+ statlist_for_each(item, &user_list) {
+ user = container_of(item, PgUser, head);
+ if (strcmp(user->name, name) == 0)
+ return user;
+ }
+ return NULL;
+}
+
+/* create lookup list */
+void create_auth_cache(void)
+{
+ int i = 0;
+ List *item;
+ PgUser *user;
+
+ reset_auth_cache();
+
+ user_lookup = malloc(sizeof(PgUser *) * statlist_count(&user_list));
+ if (!user_lookup)
+ return;
+
+ statlist_for_each(item, &user_list) {
+ user = container_of(item, PgUser, head);
+ user_lookup[i++] = user;
+ }
+}
+
+/* create new pool object */
+static PgPool *new_pool(PgDatabase *db, PgUser *user)
+{
+ PgPool *pool;
+
+ pool = zmalloc(sizeof(*pool));
+ if (!pool)
+ return NULL;
+
+ list_init(&pool->head);
+ list_init(&pool->map_head);
+
+ pool->user = user;
+ pool->db = db;
+
+ statlist_init(&pool->active_client_list, "active_client_list");
+ statlist_init(&pool->waiting_client_list, "waiting_client_list");
+ statlist_init(&pool->active_server_list, "active_server_list");
+ statlist_init(&pool->idle_server_list, "idle_server_list");
+ statlist_init(&pool->tested_server_list, "tested_server_list");
+ statlist_init(&pool->used_server_list, "used_server_list");
+ statlist_init(&pool->new_server_list, "new_server_list");
+ statlist_init(&pool->cancel_req_list, "cancel_req_list");
+
+ list_append(&pool->map_head, &user->pool_list);
+
+ /* keep pools in db/user order to make stats faster */
+ put_in_order(&pool->head, &pool_list, cmp_pool);
+
+ return pool;
+}
+
+/* find pool object, create if needed */
+PgPool *get_pool(PgDatabase *db, PgUser *user)
+{
+ List *item;
+ PgPool *pool;
+
+ if (!db || !user)
+ return NULL;
+
+ list_for_each(item, &user->pool_list) {
+ pool = container_of(item, PgPool, map_head);
+ if (pool->db == db)
+ return pool;
+ }
+
+ return new_pool(db, user);
+}
+
+/* deactivate socket and put into wait queue */
+void pause_client(PgSocket *client)
+{
+ Assert(client->state == CL_ACTIVE);
+
+ slog_debug(client, "pause_client");
+ change_client_state(client, CL_WAITING);
+ sbuf_pause(&client->sbuf);
+}
+
+/* wake client from wait */
+void activate_client(PgSocket *client)
+{
+ Assert(client->state == CL_WAITING);
+
+ slog_debug(client, "activate_client");
+ change_client_state(client, CL_ACTIVE);
+ sbuf_continue(&client->sbuf);
+}
+
+/* link if found, otherwise put into wait queue */
+bool find_server(PgSocket *client)
+{
+ PgPool *pool = client->pool;
+ PgSocket *server;
+ bool res;
+
+ Assert(client->state == CL_ACTIVE);
+
+ if (client->link)
+ return true;
+
+ /* try to get idle server, if allowed */
+ if (cf_pause_mode == 1)
+ server = NULL;
+ else
+ server = first_socket(&pool->idle_server_list);
+
+ /* link or send to waiters list */
+ if (server) {
+ Assert(server->state == SV_IDLE);
+ client->link = server;
+ server->link = client;
+ change_server_state(server, SV_ACTIVE);
+ res = true;
+ } else {
+ pause_client(client);
+ Assert(client->state == CL_WAITING);
+ res = false;
+ }
+ return res;
+}
+
+/* connecting/active -> idle, unlink if needed */
+void release_server(PgSocket *server)
+{
+ PgPool *pool = server->pool;
+ SocketState newstate = SV_IDLE;
+
+ /* btw, this function is not allowed to disconnect,
+ as there may be packet pending */
+ Assert(server->ready);
+
+ /* remove from old list */
+ switch (server->state) {
+ case SV_ACTIVE:
+ server->link->link = NULL;
+ server->link = NULL;
+
+ if (cf_server_check_delay == 0 && *cf_server_check_query)
+ newstate = SV_USED;
+ case SV_USED:
+ case SV_TESTED:
+ break;
+ case SV_LOGIN:
+ pool->last_connect_failed = 0;
+ break;
+ default:
+ fatal("bad server state in release_server");
+ }
+
+ Assert(server->link == NULL);
+
+ log_debug("release_server: new state=%d", newstate);
+
+ change_server_state(server, newstate);
+}
+
+/* drop server connection */
+void disconnect_server(PgSocket *server, bool notify, const char *reason)
+{
+ PgPool *pool = server->pool;
+ PgSocket *client = server->link;
+ static const uint8 pkt_term[] = {'X', 0,0,0,4};
+ int send_term = 1;
+
+ log_debug("disconnect_server");
+ slog_info(server, "closing because: %s", reason);
+
+ switch (server->state) {
+ case SV_ACTIVE:
+ client = server->link;
+ if (client) {
+ client->link = NULL;
+ server->link = NULL;
+ disconnect_client(client, true, reason);
+ }
+ break;
+ case SV_TESTED:
+ case SV_USED:
+ case SV_IDLE:
+ break;
+ case SV_LOGIN:
+ /*
+ * usually disconnect means problems in startup phase,
+ * except when sending cancel packet
+ */
+ if (!server->ready)
+ pool->last_connect_failed = 1;
+ else
+ send_term = 0;
+ break;
+ default:
+ fatal("disconnect_server: bad server state");
+ }
+
+ Assert(server->link == NULL);
+
+ /* notify server and close connection */
+ if (send_term && notify)
+ sbuf_answer(&server->sbuf, pkt_term, sizeof(pkt_term));
+ sbuf_close(&server->sbuf);
+
+ change_server_state(server, SV_FREE);
+}
+
+/* drop client connection */
+void disconnect_client(PgSocket *client, bool notify, const char *reason)
+{
+ slog_debug(client, "closing because: %s", reason);
+
+ switch (client->state) {
+ case CL_ACTIVE:
+ if (client->link) {
+ PgSocket *server = client->link;
+ if (server->ready) {
+ release_server(server);
+ } else {
+ server->link = NULL;
+ client->link = NULL;
+ disconnect_server(server, true, "unclean server");
+ }
+ }
+ case CL_LOGIN:
+ case CL_WAITING:
+ case CL_CANCEL:
+ break;
+ default:
+ fatal("bad client state in disconnect_client: %d", client->state);
+ }
+
+ /* send reason to client */
+ if (notify && reason) {
+ /*
+ * dont send Ready pkt here, or client wont notice
+ * closed connection
+ */
+ send_pooler_error(client, false, reason);
+ }
+
+ sbuf_close(&client->sbuf);
+
+ change_client_state(client, CL_FREE);
+}
+
+/* the pool needs new connection, if possible */
+void launch_new_connection(PgPool *pool)
+{
+ PgSocket *server;
+ int total;
+
+ /* allow only small number of connection attempts at a time */
+ if (!statlist_empty(&pool->new_server_list)) {
+ log_debug("launch_new_connection: already progress");
+ return;
+ }
+
+ /* if server bounces, dont retry too fast */
+ if (pool->last_connect_failed) {
+ usec_t now = get_cached_time();
+ if (now - pool->last_connect_time < cf_server_login_retry) {
+ log_debug("launch_new_connection: last failed, wait");
+ return;
+ }
+ }
+
+ /* is it allowed to add servers? */
+ total = pool_server_count(pool);
+ if (total >= pool->db->pool_size && pool->db->welcome_msg_ready) {
+ log_debug("launch_new_connection: pool full (%d >= %d)",
+ total, pool->db->pool_size);
+ return;
+ }
+
+ /* get free conn object */
+ server = new_server();
+ if (!server) {
+ log_debug("launch_new_connection: no mem");
+ return;
+ }
+
+ /* initialize it */
+ server->pool = pool;
+ server->auth_user = server->pool->user;
+ server->addr = server->pool->db->addr;
+ server->connect_time = get_cached_time();
+ pool->last_connect_time = get_cached_time();
+ change_server_state(server, SV_LOGIN);
+
+ /* start connecting */
+ slog_info(server, "new connection to server");
+ sbuf_connect(&server->sbuf, &server->addr, cf_server_connect_timeout / USEC);
+}
+
+/* new client connection attempt */
+PgSocket * accept_client(int sock,
+ const struct sockaddr_in *addr,
+ bool is_unix)
+{
+ PgSocket *client;
+
+ /* get free PgSocket */
+ client = new_client();
+ if (!client)
+ return NULL;
+
+ client->connect_time = client->request_time = get_cached_time();
+ client->query_start = 0;
+
+ if (addr) {
+ client->addr.ip_addr = addr->sin_addr;
+ client->addr.port = ntohs(addr->sin_port);
+ } else {
+ memset(&client->addr, 0, sizeof(client->addr));
+ }
+ client->addr.is_unix = is_unix;
+ change_client_state(client, CL_LOGIN);
+
+ slog_debug(client, "got connection attempt");
+ sbuf_accept(&client->sbuf, sock, is_unix);
+
+ return client;
+}
+
+/* send cached parameters to client to pretend being server */
+/* client managed to authenticate, send welcome msg and accept queries */
+bool finish_client_login(PgSocket *client)
+{
+ switch (client->state) {
+ case CL_LOGIN:
+ change_client_state(client, CL_ACTIVE);
+ case CL_ACTIVE:
+ break;
+ default:
+ fatal("bad client state");
+ }
+
+ if (!welcome_client(client)) {
+ log_debug("finish_client_login: no welcome msg, pause");
+ client->wait_for_welcome = 1;
+ pause_client(client);
+ if (!cf_pause_mode)
+ launch_new_connection(client->pool);
+ return false;
+ }
+ client->wait_for_welcome = 0;
+
+ slog_debug(client, "logged in");
+ return true;
+}
+
+/* client->cancel_key has requested client key */
+void accept_cancel_request(PgSocket *req)
+{
+ List *pitem, *citem;
+ PgPool *pool;
+ PgSocket *server = NULL, *client, *main_client = NULL;
+
+ Assert(req->state == CL_LOGIN);
+
+ /* find real client this is for */
+ statlist_for_each(pitem, &pool_list) {
+ pool = container_of(pitem, PgPool, head);
+ statlist_for_each(citem, &pool->active_client_list) {
+ client = container_of(citem, PgSocket, head);
+ if (memcmp(client->cancel_key, req->cancel_key, 8) == 0) {
+ main_client = client;
+ break;
+ }
+ }
+ }
+
+ /* wrong key */
+ if (!main_client) {
+ disconnect_client(req, false, "failed cancel req");
+ return;
+ }
+
+ /* not linked client, just drop it then */
+ if (!main_client->link) {
+ disconnect_client(main_client, true, "canceling idle client");
+ disconnect_client(req, false, "cancel req for idle client");
+ return;
+ }
+
+ /* drop the connection silently */
+ sbuf_close(&req->sbuf);
+
+ /* remember server key */
+ server = main_client->link;
+ memcpy(req->cancel_key, server->cancel_key, 8);
+ statlist_remove(&req->head, &login_client_list);
+ statlist_append(&req->head, &pool->cancel_req_list);
+ req->state = CL_CANCEL;
+
+ launch_new_connection(pool);
+}
+
+void forward_cancel_request(PgSocket *server)
+{
+ bool res;
+ PgSocket *req = first_socket(&server->pool->cancel_req_list);
+
+ Assert(req != NULL && req->state == CL_CANCEL);
+ Assert(server->state == SV_LOGIN);
+
+ SEND_CancelRequest(res, server, req->cancel_key);
+
+ change_client_state(req, CL_FREE);
+}
+
+bool use_client_socket(int fd, PgAddr *addr,
+ const char *dbname, const char *username,
+ uint64 ckey, int oldfd, int linkfd)
+{
+ PgDatabase *db = find_database(dbname);
+ PgUser *user = find_user(username);
+ PgPool *pool = get_pool(db, user);
+ PgSocket *client;
+ PktBuf tmp;
+
+ if (!pool)
+ return false;
+
+ client = accept_client(fd, NULL, addr->is_unix);
+ client->addr = *addr;
+ client->suspended = 1;
+
+ if (!set_pool(client, dbname, username))
+ return false;
+
+ change_client_state(client, CL_ACTIVE);
+
+ /* store old cancel key */
+ pktbuf_static(&tmp, client->cancel_key, 8);
+ pktbuf_put_uint64(&tmp, ckey);
+
+ /* store old fds */
+ client->tmp_sk_oldfd = oldfd;
+ client->tmp_sk_linkfd = linkfd;
+
+ return true;
+}
+
+bool use_server_socket(int fd, PgAddr *addr,
+ const char *dbname, const char *username,
+ uint64 ckey, int oldfd, int linkfd)
+{
+ PgDatabase *db = find_database(dbname);
+ PgUser *user;
+ PgPool *pool;
+ PgSocket *server;
+ PktBuf tmp;
+
+ if (db->forced_user)
+ user = db->forced_user;
+ else
+ user = find_user(username);
+
+ pool = get_pool(db, user);
+ if (!pool)
+ return false;
+
+ server = new_server();
+ if (!server)
+ return false;
+
+ sbuf_accept(&server->sbuf, fd, addr->is_unix);
+ server->suspended = 1;
+ server->pool = pool;
+ server->auth_user = user;
+ server->addr = *addr;
+ server->connect_time = server->request_time = get_cached_time();
+ server->query_start = 0;
+
+ if (linkfd)
+ change_server_state(server, SV_ACTIVE);
+ else
+ change_server_state(server, SV_IDLE);
+
+ /* store old cancel key */
+ pktbuf_static(&tmp, server->cancel_key, 8);
+ pktbuf_put_uint64(&tmp, ckey);
+
+ /* store old fds */
+ server->tmp_sk_oldfd = oldfd;
+ server->tmp_sk_linkfd = linkfd;
+
+ return true;
+}
+
+void for_each_server(PgPool *pool, void (*func)(PgSocket *sk))
+{
+ List *item;
+
+ statlist_for_each(item, &pool->idle_server_list)
+ func(container_of(item, PgSocket, head));
+
+ statlist_for_each(item, &pool->used_server_list)
+ func(container_of(item, PgSocket, head));
+
+ statlist_for_each(item, &pool->tested_server_list)
+ func(container_of(item, PgSocket, head));
+
+ statlist_for_each(item, &pool->active_server_list)
+ func(container_of(item, PgSocket, head));
+
+ statlist_for_each(item, &pool->new_server_list)
+ func(container_of(item, PgSocket, head));
+}
+
+static void tag_dirty(PgSocket *sk)
+{
+ sk->close_needed = 1;
+}
+
+void tag_database_dirty(PgDatabase *db)
+{
+ List *item;
+ PgPool *pool;
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ if (pool->db == db)
+ for_each_server(pool, tag_dirty);
+ }
+}
+
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+extern StatList user_list;
+extern StatList pool_list;
+extern StatList database_list;
+extern StatList login_client_list;
+extern StatList free_server_list;
+extern StatList free_client_list;
+extern StatList login_client_list;
+
+PgDatabase *find_database(const char *name);
+PgUser *find_user(const char *name);
+PgPool *get_pool(PgDatabase *, PgUser *);
+bool find_server(PgSocket *client);
+void release_server(PgSocket *server);
+bool finish_client_login(PgSocket *client);
+
+PgSocket * accept_client(int sock, const struct sockaddr_in *addr, bool is_unix);
+void disconnect_server(PgSocket *server, bool notify, const char *reason);
+void disconnect_client(PgSocket *client, bool notify, const char *reason);
+
+PgDatabase * add_database(const char *name);
+PgUser * add_user(const char *name, const char *passwd);
+PgUser * force_user(PgDatabase *db, const char *username, const char *passwd);
+
+void accept_cancel_request(PgSocket *req);
+void forward_cancel_request(PgSocket *server);
+
+void launch_new_connection(PgPool *pool);
+
+bool use_client_socket(int fd, PgAddr *addr, const char *dbname, const char *username, uint64 ckey, int oldfd, int linkfd);
+bool use_server_socket(int fd, PgAddr *addr, const char *dbname, const char *username, uint64 ckey, int oldfd, int linkfd);
+
+void pause_client(PgSocket *client);
+void activate_client(PgSocket *client);
+
+void change_client_state(PgSocket *client, SocketState newstate);
+void change_server_state(PgSocket *server, SocketState newstate);
+
+int get_active_client_count(void);
+int get_active_server_count(void);
+
+void tag_database_dirty(PgDatabase *db);
+void for_each_server(PgPool *pool, void (*func)(PgSocket *sk));
+
+void create_auth_cache(void);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Packet writing and sending.
+ */
+
+#include "bouncer.h"
+
+static void pktbuf_free(PktBuf *buf)
+{
+ if (buf->fixed_buf)
+ return;
+
+ log_debug("pktbuf_free(%p)", buf);
+ if (buf->buf)
+ free(buf->buf);
+ if (buf->ev)
+ free(buf->ev);
+ free(buf);
+}
+
+PktBuf *pktbuf_dynamic(int start_len)
+{
+ PktBuf *buf = zmalloc(sizeof(PktBuf));
+ log_debug("pktbuf_dynamic(%d): %p", start_len, buf);
+ if (!buf)
+ return NULL;
+
+ buf->ev = zmalloc(sizeof(*buf->ev));
+ if (!buf->ev) {
+ pktbuf_free(buf);
+ return NULL;
+ }
+ buf->buf = malloc(start_len);
+ if (!buf->buf) {
+ pktbuf_free(buf);
+ return NULL;
+ }
+ buf->buf_len = start_len;
+ return buf;
+}
+
+void pktbuf_static(PktBuf *buf, uint8 *data, int len)
+{
+ memset(buf, 0, sizeof(*buf));
+ buf->buf = data;
+ buf->buf_len = len;
+ buf->fixed_buf = 1;
+}
+
+bool pktbuf_send_immidiate(PktBuf *buf, PgSocket *sk)
+{
+ int fd = sbuf_socket(&sk->sbuf);
+ uint8 *pos = buf->buf + buf->send_pos;
+ int amount = buf->write_pos - buf->send_pos;
+ int res;
+
+ if (buf->failed)
+ return false;
+ res = safe_send(fd, pos, amount, 0);
+ if (res < 0) {
+ log_error("pktbuf_send_immidiate: %s", strerror(errno));
+ }
+ return res == amount;
+}
+
+static void pktbuf_send_func(int fd, short flags, void *arg)
+{
+ PktBuf *buf = arg;
+ int amount, res;
+
+ log_debug("pktbuf_send_func(%d, %d, %p)", fd, (int)flags, buf);
+
+ if (buf->failed)
+ return;
+
+ amount = buf->write_pos - buf->send_pos;
+ res = safe_send(fd, buf->buf + buf->send_pos, amount, 0);
+ if (res < 0) {
+ if (res == EAGAIN) {
+ res = 0;
+ } else {
+ log_error("pktbuf_send_func: %s", strerror(errno));
+ pktbuf_free(buf);
+ return;
+ }
+ }
+ buf->send_pos += res;
+
+ if (buf->send_pos < buf->write_pos) {
+ event_set(buf->ev, fd, EV_WRITE, pktbuf_send_func, buf);
+ event_add(buf->ev, NULL);
+ } else
+ pktbuf_free(buf);
+}
+
+void pktbuf_send_queued(PktBuf *buf, PgSocket *sk)
+{
+ int fd = sbuf_socket(&sk->sbuf);
+
+ Assert(!buf->sending);
+ Assert(!buf->fixed_buf);
+
+ if (buf->failed) {
+ send_pooler_error(sk, true, "result prepare failed");
+ pktbuf_free(buf);
+ } else {
+ buf->sending = 1;
+ pktbuf_send_func(fd, EV_WRITE, buf);
+ }
+}
+
+static void make_room(PktBuf *buf, int len)
+{
+ int newlen = buf->buf_len;
+ int need = buf->write_pos + len;
+ void *ptr;
+
+ if (newlen >= need)
+ return;
+
+ if (buf->failed)
+ return;
+
+ if (buf->fixed_buf) {
+ buf->failed = 1;
+ return;
+ }
+
+ while (newlen < need)
+ newlen = newlen * 2;
+
+ log_debug("make_room(%p, %d): realloc newlen=%d",
+ buf, len, newlen);
+ ptr = realloc(buf->buf, newlen);
+ if (!ptr) {
+ buf->failed = 1;
+ } else {
+ buf->buf = ptr;
+ buf->buf_len = newlen;
+ }
+}
+
+void pktbuf_put_char(PktBuf *buf, char val)
+{
+ make_room(buf, 1);
+ if (buf->failed)
+ return;
+
+ buf->buf[buf->write_pos++] = val;
+}
+
+void pktbuf_put_uint16(PktBuf *buf, uint16 val)
+{
+ make_room(buf, 4);
+ if (buf->failed)
+ return;
+
+ buf->buf[buf->write_pos++] = (val >> 8) & 255;
+ buf->buf[buf->write_pos++] = val & 255;
+}
+
+void pktbuf_put_uint32(PktBuf *buf, uint32 val)
+{
+ uint8 *pos;
+
+ make_room(buf, 4);
+ if (buf->failed)
+ return;
+
+ pos = buf->buf + buf->write_pos;
+ pos[0] = (val >> 24) & 255;
+ pos[1] = (val >> 16) & 255;
+ pos[2] = (val >> 8) & 255;
+ pos[3] = val & 255;
+ buf->write_pos += 4;
+}
+
+void pktbuf_put_uint64(PktBuf *buf, uint64 val)
+{
+ pktbuf_put_uint32(buf, val >> 32);
+ pktbuf_put_uint32(buf, (uint32)val);
+}
+
+void pktbuf_put_bytes(PktBuf *buf, const void *data, int len)
+{
+ make_room(buf, len);
+ if (buf->failed)
+ return;
+ memcpy(buf->buf + buf->write_pos, data, len);
+ buf->write_pos += len;
+}
+
+void pktbuf_put_string(PktBuf *buf, const char *str)
+{
+ int len = strlen(str);
+ pktbuf_put_bytes(buf, str, len + 1);
+}
+
+/*
+ * write header, remember pos to write length later.
+ */
+void pktbuf_start_packet(PktBuf *buf, int type)
+{
+ if (buf->failed)
+ return;
+
+ if (type < 256) {
+ /* new-style packet */
+ pktbuf_put_char(buf, type);
+ buf->pktlen_pos = buf->write_pos;
+ pktbuf_put_uint32(buf, 0);
+ } else {
+ /* old-style packet */
+ buf->pktlen_pos = buf->write_pos;
+ pktbuf_put_uint32(buf, 0);
+ pktbuf_put_uint32(buf, type);
+ }
+}
+
+void pktbuf_finish_packet(PktBuf *buf)
+{
+ uint8 *pos;
+ unsigned len;
+
+ if (buf->failed)
+ return;
+
+ len = buf->write_pos - buf->pktlen_pos;
+ pos = buf->buf + buf->pktlen_pos;
+ buf->pktlen_pos = 0;
+
+ *pos++ = (len >> 24) & 255;
+ *pos++ = (len >> 16) & 255;
+ *pos++ = (len >> 8) & 255;
+ *pos++ = len & 255;
+}
+
+/* types:
+ * c - char/byte
+ * h - uint16
+ * i - uint32
+ * q - uint64
+ * s - Cstring
+ * b - bytes
+ */
+void pktbuf_write_generic(PktBuf *buf, int type, const char *pktdesc, ...)
+{
+ va_list ap;
+ int len;
+ const char *adesc = pktdesc;
+ uint8 *bin;
+
+ pktbuf_start_packet(buf, type);
+
+ va_start(ap, pktdesc);
+ while (*adesc) {
+ switch (*adesc) {
+ case 'c':
+ pktbuf_put_char(buf, va_arg(ap, int));
+ break;
+ case 'h':
+ pktbuf_put_uint16(buf, va_arg(ap, int));
+ break;
+ case 'i':
+ pktbuf_put_uint32(buf, va_arg(ap, int));
+ break;
+ case 'q':
+ pktbuf_put_uint64(buf, va_arg(ap, uint64));
+ break;
+ case 's':
+ pktbuf_put_string(buf, va_arg(ap, char *));
+ break;
+ case 'b':
+ bin = va_arg(ap, uint8 *);
+ len = va_arg(ap, int);
+ pktbuf_put_bytes(buf, bin, len);
+ break;
+ default:
+ fatal("bad pktdesc: %s", pktdesc);
+ }
+ adesc++;
+ }
+ va_end(ap);
+
+ /* set correct length */
+ pktbuf_finish_packet(buf);
+}
+
+
+/* send resultset column info
+ * tupdesc keys:
+ * 'i' - int4
+ * 'q' - int8
+ * 's' - string
+ * 'T' - usec_t to date
+ */
+void pktbuf_write_RowDescription(PktBuf *buf, const char *tupdesc, ...)
+{
+ va_list ap;
+ char *name;
+ int i, ncol = strlen(tupdesc);
+
+ log_noise("write RowDescription");
+
+ pktbuf_start_packet(buf, 'T');
+
+ pktbuf_put_uint16(buf, ncol);
+
+ va_start(ap, tupdesc);
+ for (i = 0; i < ncol; i++) {
+ name = va_arg(ap, char *);
+
+ /* Fields: name, reloid, colnr, oid, typsize, typmod, fmt */
+ pktbuf_put_string(buf, name);
+ pktbuf_put_uint32(buf, 0);
+ pktbuf_put_uint16(buf, 0);
+ if (tupdesc[i] == 's') {
+ pktbuf_put_uint32(buf, TEXTOID);
+ pktbuf_put_uint16(buf, -1);
+ } else if (tupdesc[i] == 'i') {
+ pktbuf_put_uint32(buf, INT4OID);
+ pktbuf_put_uint16(buf, 4);
+ } else if (tupdesc[i] == 'q') {
+ pktbuf_put_uint32(buf, INT8OID);
+ pktbuf_put_uint16(buf, 8);
+ } else if (tupdesc[i] == 'T') {
+ pktbuf_put_uint32(buf, TEXTOID);
+ pktbuf_put_uint16(buf, -1);
+ } else
+ fatal("bad tupdesc");
+ pktbuf_put_uint32(buf, 0);
+ pktbuf_put_uint16(buf, 0);
+ }
+ va_end(ap);
+
+ /* set correct length */
+ pktbuf_finish_packet(buf);
+}
+
+/*
+ * send DataRow.
+ *
+ * tupdesc keys:
+ * 'i' - int4
+ * 'q' - int8
+ * 's' - string
+ * 'T' - usec_t to date
+ */
+void pktbuf_write_DataRow(PktBuf *buf, const char *tupdesc, ...)
+{
+ char tmp[32];
+ const char *val = NULL;
+ int i, len, ncol = strlen(tupdesc);
+ va_list ap;
+
+ pktbuf_start_packet(buf, 'D');
+ pktbuf_put_uint16(buf, ncol);
+
+ va_start(ap, tupdesc);
+ for (i = 0; i < ncol; i++) {
+ if (tupdesc[i] == 'i') {
+ sprintf(tmp, "%d", va_arg(ap, int));
+ val = tmp;
+ } else if (tupdesc[i] == 'q') {
+ sprintf(tmp, "%llu", (unsigned long long)va_arg(ap, uint64));
+ val = tmp;
+ } else if (tupdesc[i] == 's') {
+ val = va_arg(ap, char *);
+ } else if (tupdesc[i] == 'T') {
+ usec_t time = va_arg(ap, usec_t);
+ val = format_date(time);
+ } else
+ fatal("bad tupdesc: %s", tupdesc);
+
+ if (val) {
+ len = strlen(val);
+ pktbuf_put_uint32(buf, len + 1);
+ pktbuf_put_string(buf, val);
+ } else {
+ /* NULL */
+ pktbuf_put_uint32(buf, -1);
+ }
+ }
+ va_end(ap);
+
+ pktbuf_finish_packet(buf);
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Safe & easy creation of PostgreSQL packets.
+ */
+
+typedef struct PktBuf PktBuf;
+struct PktBuf {
+ uint8 *buf;
+ int buf_len;
+ int write_pos;
+ int pktlen_pos;
+
+ int send_pos;
+ struct event *ev;
+
+ unsigned failed:1;
+ unsigned sending:1;
+ unsigned fixed_buf:1;
+};
+
+/*
+ * pktbuf creation
+ */
+PktBuf *pktbuf_dynamic(int start_len);
+void pktbuf_static(PktBuf *buf, uint8 *data, int len);
+
+/*
+ * sending
+ */
+bool pktbuf_send_immidiate(PktBuf *buf, PgSocket *sk);
+void pktbuf_send_queued(PktBuf *buf, PgSocket *sk);
+
+/*
+ * low-level ops
+ */
+void pktbuf_start_packet(PktBuf *buf, int type);
+void pktbuf_put_char(PktBuf *buf, char val);
+void pktbuf_put_uint16(PktBuf *buf, uint16 val);
+void pktbuf_put_uint32(PktBuf *buf, uint32 val);
+void pktbuf_put_uint64(PktBuf *buf, uint64 val);
+void pktbuf_put_string(PktBuf *buf, const char *str);
+void pktbuf_put_bytes(PktBuf *buf, const void *data, int len);
+void pktbuf_finish_packet(PktBuf *buf);
+#define pktbuf_written(buf) ((buf)->write_pos)
+
+
+/*
+ * Packet writing
+ */
+void pktbuf_write_generic(PktBuf *buf, int type, const char *fmt, ...);
+void pktbuf_write_RowDescription(PktBuf *buf, const char *tupdesc, ...);
+void pktbuf_write_DataRow(PktBuf *buf, const char *tupdesc, ...);
+
+/*
+ * Shortcuts for actual packets.
+ */
+#define pktbuf_write_ParameterStatus(buf, key, val) \
+ pktbuf_write_generic(buf, 'S', "ss", key, val)
+
+#define pktbuf_write_AuthenticationOk(buf) \
+ pktbuf_write_generic(buf, 'R', "i", 0)
+
+#define pktbuf_write_ReadyForQuery(buf) \
+ pktbuf_write_generic(buf, 'Z', "c", 'I')
+
+#define pktbuf_write_CommandComplete(buf, desc) \
+ pktbuf_write_generic(buf, 'C', "s", desc)
+
+#define pktbuf_write_BackendKeyData(buf, key) \
+ pktbuf_write_generic(buf, 'K', "b", key, 8)
+
+#define pktbuf_write_CancelRequest(buf, key) \
+ pktbuf_write_generic(buf, PKT_CANCEL, "b", key, 8)
+
+#define pktbuf_write_StartupMessage(buf, user, parms, parms_len) \
+ pktbuf_write_generic(buf, PKT_STARTUP, "bsss", parms, parms_len, "user", user, "")
+
+#define pktbuf_write_PasswordMessage(buf, psw) \
+ pktbuf_write_generic(buf, 'p', "s", psw)
+
+/*
+ * Shortcut for creating DataRow in memory.
+ */
+
+#define BUILD_DataRow(reslen, dst, dstlen, args...) do { \
+ PktBuf _buf; \
+ pktbuf_static(&_buf, dst, dstlen); \
+ pktbuf_write_DataRow(&_buf, ## args); \
+ reslen = _buf.failed ? -1 : _buf.write_pos; \
+} while (0)
+
+/*
+ * Shortcuts for immidiate send of one packet.
+ */
+
+#define SEND_wrap(buflen, pktfn, res, sk, args...) do { \
+ uint8 _data[buflen]; PktBuf _buf; \
+ pktbuf_static(&_buf, _data, sizeof(_data)); \
+ pktfn(&_buf, ## args); \
+ res = pktbuf_send_immidiate(&_buf, sk); \
+} while (0)
+
+#define SEND_RowDescription(res, sk, args...) \
+ SEND_wrap(512, pktbuf_write_RowDescription, res, sk, ## args)
+
+#define SEND_generic(res, sk, args...) \
+ SEND_wrap(512, pktbuf_write_generic, res, sk, ## args)
+
+#define SEND_ReadyForQuery(res, sk) \
+ SEND_wrap(8, pktbuf_write_ReadyForQuery, res, sk)
+
+#define SEND_CancelRequest(res, sk, key) \
+ SEND_wrap(16, pktbuf_write_CancelRequest, res, sk, key)
+
+#define SEND_PasswordMessage(res, sk, psw) \
+ SEND_wrap(512, pktbuf_write_PasswordMessage, res, sk, psw)
+
+
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Handling of pooler listening sockets
+ */
+
+#include "bouncer.h"
+
+static int fd_net = 0;
+static int fd_unix = 0;
+static struct event ev_net;
+static struct event ev_unix;
+static int suspended = 0;
+
+static struct event ev_err;
+static struct timeval err_timeout = {5, 0};
+
+static void cleanup_unix_socket(void)
+{
+ char fn[256];
+ if (!cf_unix_socket_dir || suspended)
+ return;
+ snprintf(fn, sizeof(fn), "%s/.s.PGSQL.%d",
+ cf_unix_socket_dir, cf_listen_port);
+ unlink(fn);
+}
+
+void get_pooler_fds(int *p_net, int *p_unix)
+{
+ *p_net = fd_net;
+ *p_unix = fd_unix;
+}
+
+static int create_unix_socket(const char *socket_dir, int listen_port)
+{
+ struct sockaddr_un un;
+ int res, sock;
+ char lockfile[256];
+ struct stat st;
+
+ /* fill sockaddr struct */
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path),
+ "%s/.s.PGSQL.%d", socket_dir, listen_port);
+
+ /* check for lockfile */
+ snprintf(lockfile, sizeof(lockfile), "%s.lock", un.sun_path);
+ res = lstat(lockfile, &st);
+ if (res == 0)
+ fatal("unix port %d is in use", listen_port);
+
+ /* expect old bouncer gone */
+ unlink(un.sun_path);
+
+ /* create socket */
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0)
+ fatal_perror("socket");
+
+ /* bind it */
+ res = bind(sock, (const struct sockaddr *)&un, sizeof(un));
+ if (res < 0)
+ fatal_perror("bind");
+
+ /* remove socket on shutdown */
+ atexit(cleanup_unix_socket);
+
+ /* set common options */
+ tune_socket(sock, true);
+
+ /* finally, accept connections */
+ res = listen(sock, 100);
+ if (res < 0)
+ fatal_perror("listen");
+
+ res = chmod(un.sun_path, 0777);
+ if (res < 0)
+ fatal_perror("chmod");
+
+ log_info("listening on unix:%s", un.sun_path);
+
+ return sock;
+}
+
+static int create_net_socket(const char *listen_addr, int listen_port)
+{
+ int sock;
+ struct sockaddr_in sa;
+ int res;
+ int val;
+
+ /* create socket */
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0)
+ fatal_perror("socket");
+
+ /* parse address */
+ memset(&sa, 0, sizeof(sa));
+ sa.sin_family = AF_INET;
+ sa.sin_port = htons(cf_listen_port);
+ if (strcmp(listen_addr, "*") == 0) {
+ sa.sin_addr.s_addr = htonl(INADDR_ANY);
+ } else {
+ sa.sin_addr.s_addr = inet_addr(listen_addr);
+ if (sa.sin_addr.s_addr == INADDR_NONE)
+ fatal("cannot parse addr: '%s'", listen_addr);
+ }
+
+ /* relaxed binding */
+ val = 1;
+ res = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt");
+
+ /* bind to address */
+ res = bind(sock, (struct sockaddr *)&sa, sizeof(sa));
+ if (res < 0)
+ fatal_perror("bind");
+
+ /* set common options */
+ tune_socket(sock, false);
+
+#ifdef TCP_DEFER_ACCEPT
+ /*
+ * Notify pooler only when also data is arrived.
+ *
+ * optval specifies how long after connection attempt to wait for data.
+ *
+ * Related to tcp_synack_retries sysctl, default 5 (corresponds 180 secs).
+ */
+ if (cf_tcp_defer_accept > 0) {
+ val = cf_tcp_defer_accept;
+ res = setsockopt(sock, IPPROTO_TCP, TCP_DEFER_ACCEPT, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt TCP_DEFER_ACCEPT");
+ }
+#endif
+
+ /* finally, accept connections */
+ res = listen(sock, 100);
+ if (res < 0)
+ fatal_perror("listen");
+
+ log_info("listening on %s:%d", cf_listen_addr, cf_listen_port);
+
+ return sock;
+}
+
+static void err_wait_func(int sock, short flags, void *arg)
+{
+ resume_pooler();
+}
+
+/* got new connection, associate it with client struct */
+static void
+pool_accept(int sock, short flags, void *is_unix)
+{
+ int fd;
+ union {
+ struct sockaddr_in in;
+ struct sockaddr_un un;
+ struct sockaddr sa;
+ } addr;
+ socklen_t len = sizeof(addr);
+
+ /* get fd */
+ fd = accept(sock, &addr.sa, &len);
+ if (fd < 0) {
+ /*
+ * probably fd limit, pointess to try often
+ * wait a bit, hope that admin resolves somehow
+ */
+ log_error("accept() failed: %s", strerror(errno));
+ suspend_pooler();
+ evtimer_set(&ev_err, err_wait_func, NULL);
+ evtimer_add(&ev_err, &err_timeout);
+ return;
+ }
+
+ log_noise("new fd from accept=%d", fd);
+ if (is_unix) {
+ log_debug("P: new unix client");
+ {
+ uid_t uid;
+ log_noise("getuid(): %d", (int)getuid());
+ if (get_unix_peer_uid(fd, &uid))
+ log_noise("unix peer uid: %d", (int)uid);
+ else
+ log_noise("unix peer uid failed");
+ }
+ accept_client(fd, NULL, true);
+ } else {
+ log_debug("P: new tcp client");
+ accept_client(fd, &addr.in, false);
+ }
+}
+
+bool
+use_pooler_socket(int sock, bool is_unix)
+{
+ tune_socket(sock, is_unix);
+
+ if (is_unix)
+ fd_unix = sock;
+ else
+ fd_net = sock;
+ return true;
+}
+
+void
+suspend_pooler(void)
+{
+ suspended = 1;
+
+ if (fd_net)
+ event_del(&ev_net);
+ if (fd_unix)
+ event_del(&ev_unix);
+}
+
+void
+resume_pooler(void)
+{
+ suspended = 0;
+
+ if (fd_unix) {
+ event_set(&ev_unix, fd_unix, EV_READ | EV_PERSIST, pool_accept, "1");
+ event_add(&ev_unix, NULL);
+ }
+
+ if (fd_net) {
+ event_set(&ev_net, fd_net, EV_READ | EV_PERSIST, pool_accept, NULL);
+ event_add(&ev_net, NULL);
+ }
+}
+
+/* listen on socket - should happen after all other initializations */
+void
+pooler_setup(void)
+{
+ if (cf_listen_addr && !fd_net)
+ fd_net = create_net_socket(cf_listen_addr, cf_listen_port);
+
+ if (cf_unix_socket_dir && !fd_unix)
+ fd_unix = create_unix_socket(cf_unix_socket_dir, cf_listen_port);
+
+ if (!fd_net && !fd_unix)
+ fatal("nowhere to listen on");
+
+ resume_pooler();
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void pooler_setup(void);
+bool use_pooler_socket(int fd, bool is_unix);
+void resume_pooler(void);
+void suspend_pooler(void);
+void get_pooler_fds(int *p_net, int *p_unix);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Pieces that need to have detailed info about protocol.
+ */
+
+#include "bouncer.h"
+
+/*
+ * parse protocol header from MBuf
+ */
+
+/* parses pkt header from buffer, returns false if failed */
+bool get_header(MBuf *pkt, unsigned *pkt_type_p, unsigned *pkt_len_p)
+{
+ unsigned type;
+ unsigned len;
+ unsigned code;
+
+ if (mbuf_avail(pkt) < 5) {
+ log_noise("get_header: less then 5 bytes available");
+ return false;
+ }
+ type = mbuf_get_char(pkt);
+ if (type != 0) {
+ len = mbuf_get_uint32(pkt) + 1;
+ } else {
+ if (mbuf_get_char(pkt) != 0) {
+ log_noise("get_header: unknown special pkt");
+ return false;
+ }
+ /* dont tolerate partial pkt */
+ if (mbuf_avail(pkt) < 6) {
+ log_noise("get_header: less that 6 bytes for special pkt");
+ return false;
+ }
+ len = mbuf_get_uint16(pkt);
+ code = mbuf_get_uint32(pkt);
+ if (code == 80877102)
+ type = PKT_CANCEL;
+ else if (code == 80877103)
+ type = PKT_SSLREQ;
+ else if ((code >> 16) == 3 && (code & 0xFFFF) < 2)
+ type = PKT_STARTUP;
+ else {
+ log_noise("get_header: unknown special pkt: len=%u code=%u", len, code);
+ return false;
+ }
+ }
+ *pkt_type_p = type;
+ *pkt_len_p = len;
+ return true;
+}
+
+
+/*
+ * Send error message packet to client.
+ */
+
+bool send_pooler_error(PgSocket *client, bool send_ready, const char *msg)
+{
+ uint8 tmpbuf[512];
+ PktBuf buf;
+
+ slog_error(client, "Pooler Error: %s", msg);
+
+ pktbuf_static(&buf, tmpbuf, sizeof(tmpbuf));
+ pktbuf_write_generic(&buf, 'E', "cscscsc",
+ 'S', "ERROR", 'C', "08P01", 'M', msg, 0);
+ if (send_ready)
+ pktbuf_write_ReadyForQuery(&buf);
+ return pktbuf_send_immidiate(&buf, client);
+}
+
+/*
+ * Parse server error message and log it.
+ */
+void log_server_error(const char *note, MBuf *pkt)
+{
+ const char *level = NULL, *msg = NULL, *val;
+ int type;
+ while (mbuf_avail(pkt)) {
+ type = mbuf_get_char(pkt);
+ if (type == 0)
+ break;
+ val = mbuf_get_string(pkt);
+ if (!val)
+ break;
+ if (type == 'S')
+ level = val;
+ else if (type == 'M')
+ msg = val;
+ }
+ if (!msg || !level)
+ log_error("%s: corrupt error message", note);
+ else
+ log_error("%s: %s: %s", note, level, msg);
+}
+
+
+/*
+ * Preparation of welcome message for client connection.
+ */
+
+/* add another server parameter packet to cache */
+bool add_welcome_parameter(PgSocket *server,
+ unsigned pkt_type, unsigned pkt_len, MBuf *pkt)
+{
+ PgDatabase *db = server->pool->db;
+ PktBuf msg;
+ const char *key, *val;
+
+ if (db->welcome_msg_ready)
+ return true;
+
+ /* incomplete startup msg from server? */
+ if (pkt_len - 5 > mbuf_avail(pkt))
+ return false;
+
+ pktbuf_static(&msg, db->welcome_msg + db->welcome_msg_len,
+ sizeof(db->welcome_msg) - db->welcome_msg_len);
+
+ if (db->welcome_msg_len == 0)
+ pktbuf_write_AuthenticationOk(&msg);
+
+ key = mbuf_get_string(pkt);
+ val = mbuf_get_string(pkt);
+ if (!key || !val) {
+ log_error("broken ParameterStatus packet");
+ return false;
+ }
+ log_debug("S: param: %s = %s", key, val);
+ pktbuf_write_ParameterStatus(&msg, key, val);
+ db->welcome_msg_len += pktbuf_written(&msg);
+
+ return true;
+}
+
+/* all parameters processed */
+void finish_welcome_msg(PgSocket *server)
+{
+ PgDatabase *db = server->pool->db;
+ if (db->welcome_msg_ready)
+ return;
+ db->welcome_msg_ready = 1;
+}
+
+bool welcome_client(PgSocket *client)
+{
+ int res;
+ uint8 buf[1024];
+ PktBuf msg;
+ PgDatabase *db = client->pool->db;
+
+ log_noise("P: welcome_client");
+ if (!db->welcome_msg_ready)
+ return false;
+
+ pktbuf_static(&msg, buf, sizeof(buf));
+ pktbuf_put_bytes(&msg, db->welcome_msg, db->welcome_msg_len);
+
+ /* give each client its own cancel key */
+ get_random_bytes(client->cancel_key, 8);
+ pktbuf_write_BackendKeyData(&msg, client->cancel_key);
+ pktbuf_write_ReadyForQuery(&msg);
+
+ /* send all together */
+ res = pktbuf_send_immidiate(&msg, client);
+ if (!res)
+ log_warning("unhandled failure to send welcome_msg");
+
+ return true;
+}
+
+/*
+ * Password authentication for server
+ */
+
+/* actual packet send */
+static void send_password(PgSocket *server, const char *enc_psw)
+{
+ bool res;
+ SEND_PasswordMessage(res, server, enc_psw);
+ if (!res)
+ disconnect_server(server, true,
+ "partial send unhandled in send_password");
+}
+
+static void login_clear_psw(PgSocket *server)
+{
+ log_debug("P: send clear password");
+ send_password(server, server->pool->user->passwd);
+}
+
+static void login_crypt_psw(PgSocket *server, const uint8 *salt)
+{
+ char saltbuf[3];
+ const char *enc;
+ PgUser *user = server->pool->user;
+
+ log_debug("P: send crypt password");
+ strncpy(saltbuf, (char *)salt, 2);
+ enc = pg_crypt(user->passwd, saltbuf);
+ send_password(server, enc);
+}
+
+
+static void login_md5_psw(PgSocket *server, const uint8 *salt)
+{
+ char txt[MD5_PASSWD_LEN + 1], *src;
+ PgUser *user = server->pool->user;
+
+ log_debug("P: send md5 password");
+ if (!isMD5(user->passwd)) {
+ pg_md5_encrypt(user->passwd, user->name, strlen(user->name), txt);
+ src = txt + 3;
+ } else
+ src = user->passwd + 3;
+ pg_md5_encrypt(src, (char *)salt, 4, txt);
+
+ send_password(server, txt);
+}
+
+/* answer server authentication request */
+bool answer_authreq(PgSocket *server,
+ unsigned pkt_type, unsigned pkt_len,
+ MBuf *pkt)
+{
+ unsigned cmd;
+ const uint8 *salt;
+
+ if (pkt_len < 5 + 4)
+ return false;
+ if (mbuf_avail(pkt) < pkt_len - 5)
+ return false;
+
+ cmd = mbuf_get_uint32(pkt);
+ switch (cmd) {
+ case 0:
+ log_debug("S: auth ok");
+ break;
+ case 3:
+ log_debug("S: req cleartext password");
+ login_clear_psw(server);
+ break;
+ case 4:
+ if (pkt_len < 5 + 4 + 2)
+ return false;
+ log_debug("S: req crypt psw");
+ salt = mbuf_get_bytes(pkt, 2);
+ login_crypt_psw(server, salt);
+ break;
+ case 5:
+ if (pkt_len < 5 + 4 + 4)
+ return false;
+ log_debug("S: req md5-crypted psw");
+ salt = mbuf_get_bytes(pkt, 4);
+ login_md5_psw(server, salt);
+ break;
+ case 2: /* kerberos */
+ case 6: /* scm something */
+ log_error("unsupported auth method: %d", cmd);
+ default:
+ log_error("unknown auth method: %d", cmd);
+ }
+ return true;
+}
+
+bool send_startup_packet(PgSocket *server)
+{
+ PgDatabase *db = server->pool->db;
+ const char *username = server->pool->user->name;
+ PktBuf pkt;
+ uint8 buf[512];
+
+ pktbuf_static(&pkt, buf, sizeof(buf));
+ pktbuf_write_StartupMessage(&pkt, username,
+ db->startup_params,
+ db->startup_params_len);
+ return pktbuf_send_immidiate(&pkt, server);
+}
+
+int scan_text_result(MBuf *pkt, const char *tupdesc, ...)
+{
+ char *val = NULL;
+ int len;
+ unsigned ncol, i;
+ va_list ap;
+
+ ncol = mbuf_get_uint16(pkt);
+ if (ncol != strlen(tupdesc))
+ fatal("different number of cols");
+
+ va_start(ap, tupdesc);
+ for (i = 0; i < ncol; i++) {
+ len = mbuf_get_uint32(pkt);
+ if (len < 0)
+ val = NULL;
+ else
+ val = (char *)mbuf_get_bytes(pkt, len);
+
+ if (tupdesc[i] == 'i') {
+ int *dst_p = va_arg(ap, int *);
+ *dst_p = atoi(val);
+ } else if (tupdesc[i] == 'q') {
+ uint64 *dst_p = va_arg(ap, uint64 *);
+ *dst_p = atoll(val);
+ } else if (tupdesc[i] == 's') {
+ char **dst_p = va_arg(ap, char **);
+ *dst_p = val;
+ } else
+ fatal("bad tupdesc: %s", tupdesc);
+ }
+ va_end(ap);
+
+ if (mbuf_avail(pkt))
+ fatal("scan_text_result: unparsed data");
+
+ return ncol;
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+bool get_header(MBuf *pkt, unsigned *pkt_type_p, unsigned *pkt_len_p);
+
+bool send_pooler_error(PgSocket *client, bool send_ready, const char *msg);
+void log_server_error(const char *note, MBuf *pkt);
+
+bool add_welcome_parameter(PgSocket *server, unsigned pkt_type, unsigned pkt_len, MBuf *pkt);
+void finish_welcome_msg(PgSocket *server);
+bool welcome_client(PgSocket *client);
+
+bool answer_authreq(PgSocket *server, unsigned pkt_type, unsigned pkt_len, MBuf *pkt);
+
+bool send_startup_packet(PgSocket *server);
+
+int scan_text_result(MBuf *pkt, const char *tupdesc, ...);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stream buffer
+ *
+ * The task is to copy data from one socket to another
+ * efficiently, while allowing callbacks to look
+ * at packet headers.
+ */
+
+#include "bouncer.h"
+
+/*
+ * if less that this amount of data is pending, then
+ * prefer to merge if with next recv()
+ */
+#define SMALL_PKT 16
+
+/* declare static stuff */
+static void sbuf_queue_send(SBuf *sbuf);
+static bool sbuf_send_pending(SBuf *sbuf);
+static bool sbuf_process_pending(SBuf *sbuf);
+static void sbuf_connect_cb(int sock, short flags, void *arg);
+static void sbuf_recv_cb(int sock, short flags, void *arg);
+static void sbuf_send_cb(int sock, short flags, void *arg);
+static void sbuf_try_resync(SBuf *sbuf);
+static void sbuf_wait_for_data(SBuf *sbuf);
+
+/*
+ * Call proto callback with proper MBuf.
+ *
+ * If callback returns true it used one of sbuf_prepare_* on sbuf,
+ * and processing can continue.
+ *
+ * If it returned false it used sbuf_pause(), sbuf_close() or simply
+ * wants to wait for next event loop (eg. too few data available).
+ * Callee should not touch sbuf in that case and just return to libevent.
+ */
+static inline bool sbuf_call_proto(SBuf *sbuf, int event)
+{
+ MBuf mbuf;
+ uint8 *pos = sbuf->buf + sbuf->pkt_pos;
+ int avail = sbuf->recv_pos - sbuf->pkt_pos;
+
+ Assert(avail >= 0);
+ Assert(pos + avail <= sbuf->buf + cf_sbuf_len);
+ Assert(event != SBUF_EV_READ || avail > 0);
+
+ mbuf_init(&mbuf, pos, avail);
+ return sbuf->proto_handler(sbuf, event, &mbuf, sbuf->arg);
+}
+
+/* lets wait for new data */
+static void sbuf_wait_for_data(SBuf *sbuf)
+{
+ event_set(&sbuf->ev, sbuf->sock, EV_READ | EV_PERSIST, sbuf_recv_cb, sbuf);
+ event_add(&sbuf->ev, NULL);
+}
+
+/* initialize SBuf with proto handler */
+void sbuf_init(SBuf *sbuf, sbuf_proto_cb_t proto_fn, void *arg)
+{
+ memset(sbuf, 0, sizeof(*sbuf));
+ sbuf->arg = arg;
+ sbuf->proto_handler = proto_fn;
+}
+
+/* got new socket from accept() */
+void sbuf_accept(SBuf *sbuf, int sock, bool is_unix)
+{
+ Assert(sbuf->pkt_pos == 0);
+ Assert(sbuf->recv_pos == 0);
+ Assert(sbuf->send_pos == 0);
+
+ tune_socket(sock, is_unix);
+ sbuf->sock = sock;
+ sbuf->is_unix = is_unix;
+
+ if (!cf_reboot) {
+ sbuf_wait_for_data(sbuf);
+
+ /* socket should already have some data (linux only) */
+ if (cf_tcp_defer_accept && !is_unix)
+ sbuf_recv_cb(sbuf->sock, EV_READ, sbuf);
+ }
+}
+
+/* need to connect() to get a socket */
+void sbuf_connect(SBuf *sbuf, const PgAddr *addr, int timeout_sec)
+{
+ int res, sock, domain;
+ struct sockaddr_in sa_in;
+ struct sockaddr_un sa_un;
+ struct sockaddr *sa;
+ socklen_t len;
+ struct timeval timeout;
+
+ /* prepare sockaddr */
+ if (addr->is_unix) {
+ sa = (void*)&sa_un;
+ len = sizeof(sa_un);
+ memset(sa, 0, len);
+ sa_un.sun_family = AF_UNIX;
+ snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),
+ "%s/.s.PGSQL.%d", cf_unix_socket_dir, addr->port);
+ domain = AF_UNIX;
+ } else {
+ sa = (void*)&sa_in;
+ len = sizeof(sa_in);
+ memset(sa, 0, len);
+ sa_in.sin_family = AF_INET;
+ sa_in.sin_addr = addr->ip_addr;
+ sa_in.sin_port = htons(addr->port);
+ domain = AF_INET;
+ }
+
+ /*
+ * common stuff
+ */
+ sock = socket(domain, SOCK_STREAM, 0);
+ if (sock < 0) {
+ /* probably fd limit, try to survive */
+ log_error("sbuf_connect: socket() failed: %s", strerror(errno));
+ sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+ return;
+ }
+
+ tune_socket(sock, addr->is_unix);
+
+ sbuf->is_unix = addr->is_unix;
+ sbuf->sock = sock;
+
+ timeout.tv_sec = timeout_sec;
+ timeout.tv_usec = 0;
+
+ /* launch connection */
+ res = connect(sock, sa, len);
+ log_noise("connect(%d)=%d", sock, res);
+ if (res == 0) {
+ /* unix socket gives connection immidiately */
+ sbuf_connect_cb(sock, EV_WRITE, sbuf);
+ } else if (res < 0 && errno == EINPROGRESS) {
+ /* tcp socket needs waiting */
+ event_set(&sbuf->ev, sock, EV_WRITE, sbuf_connect_cb, sbuf);
+ event_add(&sbuf->ev, &timeout);
+ } else {
+ /* failure */
+ log_warning("connect failed: res=%d/err=%s", res, strerror(errno));
+ close(sock);
+ sbuf->sock = 0;
+ sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+ }
+}
+
+/* dont wait for data on this socket */
+void sbuf_pause(SBuf *sbuf)
+{
+ Assert(sbuf->wait_send == 0);
+
+ event_del(&sbuf->ev);
+}
+
+/* resume from pause, start waiting for data */
+void sbuf_continue(SBuf *sbuf)
+{
+ sbuf_wait_for_data(sbuf);
+
+ /* there is some data already received */
+ sbuf_recv_cb(sbuf->sock, EV_READ, sbuf);
+}
+
+/*
+ * Resume from pause and give socket over to external
+ * callback function.
+ *
+ * The callback will be called with arg given to sbuf_init.
+ */
+void sbuf_continue_with_callback(SBuf *sbuf, sbuf_libevent_cb user_cb)
+{
+ event_set(&sbuf->ev, sbuf->sock, EV_READ | EV_PERSIST,
+ user_cb, sbuf->arg);
+ event_add(&sbuf->ev, NULL);
+}
+
+/* socket cleanup & close */
+void sbuf_close(SBuf *sbuf)
+{
+ /* keep handler & arg values */
+ if (sbuf->sock > 0) {
+ event_del(&sbuf->ev);
+ safe_close(sbuf->sock);
+ }
+ sbuf->dst = NULL;
+ sbuf->sock = 0;
+ sbuf->pkt_pos = sbuf->pkt_remain = sbuf->recv_pos = 0;
+ sbuf->pkt_skip = sbuf->wait_send = sbuf->pkt_flush = 0;
+ sbuf->send_pos = sbuf->send_remain = 0;
+}
+
+/* proto_fn tells to send some bytes to socket */
+void sbuf_prepare_send(SBuf *sbuf, SBuf *dst, unsigned amount, bool flush)
+{
+ Assert(sbuf->pkt_remain == 0);
+ Assert(sbuf->pkt_skip == 0 || sbuf->send_remain == 0);
+ Assert(!sbuf->pkt_flush || sbuf->send_remain == 0);
+ Assert(amount > 0);
+
+ sbuf->pkt_skip = 0;
+ sbuf->pkt_remain = amount;
+ sbuf->pkt_flush = flush;
+ sbuf->dst = dst;
+}
+
+/* proto_fn tells to skip sone amount of bytes */
+void sbuf_prepare_skip(SBuf *sbuf, int amount)
+{
+ Assert(sbuf->pkt_remain == 0);
+ Assert(sbuf->pkt_skip == 0 || sbuf->send_remain == 0);
+ Assert(!sbuf->pkt_flush || sbuf->send_remain == 0);
+ Assert(amount > 0);
+
+ sbuf->pkt_skip = 1;
+ sbuf->pkt_remain = amount;
+ sbuf->pkt_flush = 0;
+ sbuf->dst = NULL;
+}
+
+/* libevent EV_WRITE: called when dest socket is writable again */
+static void sbuf_send_cb(int sock, short flags, void *arg)
+{
+ bool res;
+ SBuf *sbuf = arg;
+
+ sbuf->wait_send = 0;
+ res = sbuf_process_pending(sbuf);
+ if (res)
+ sbuf_wait_for_data(sbuf);
+}
+
+/* socket is full, wait until its writable again */
+static void sbuf_queue_send(SBuf *sbuf)
+{
+ sbuf->wait_send = 1;
+ event_del(&sbuf->ev);
+ event_set(&sbuf->ev, sbuf->dst->sock, EV_WRITE, sbuf_send_cb, sbuf);
+ event_add(&sbuf->ev, NULL);
+}
+
+/*
+ * Theres data in buffer to be sent. returns bool if processing can continue.
+ *
+ * Does not look at pkt_pos/remain fields, expects them to be merged to send_*
+ */
+static bool sbuf_send_pending(SBuf *sbuf)
+{
+ int res, avail;
+ uint8 *pos;
+
+try_more:
+ /* how much data is available for sending */
+ avail = sbuf->recv_pos - sbuf->send_pos;
+ if (avail > sbuf->send_remain)
+ avail = sbuf->send_remain;
+ if (avail == 0)
+ return true;
+
+ /* actually send it */
+ pos = sbuf->buf + sbuf->send_pos;
+ res = safe_send(sbuf->dst->sock, pos, avail, 0);
+ if (res >= 0) {
+ sbuf->send_remain -= res;
+ sbuf->send_pos += res;
+
+ if (res < avail) {
+ /*
+ * Should do sbuf_queue_send() immidiately?
+ *
+ * To be sure, lets run into EAGAIN.
+ */
+ goto try_more;
+ }
+ return true;
+ } else if (errno == EAGAIN) {
+ sbuf_queue_send(sbuf);
+ return false;
+ } else {
+ sbuf_call_proto(sbuf, SBUF_EV_SEND_FAILED);
+ return false;
+ }
+}
+
+/* process as much data as possible */
+static bool sbuf_process_pending(SBuf *sbuf)
+{
+ int avail;
+ bool full = sbuf->recv_pos == cf_sbuf_len;
+ bool res;
+
+ while (1) {
+ Assert(sbuf->recv_pos >= sbuf->pkt_pos);
+
+ /*
+ * Enough for now?
+ *
+ * The (avail <= SMALL_PKT) check is to avoid partial pkts.
+ * As SBuf should not assume knowledge about packets,
+ * the check is not done in !full case. Packet handler can
+ * then still notify about partial packet by returning false.
+ */
+ avail = sbuf->recv_pos - sbuf->pkt_pos;
+ if (avail == 0 || (full && avail <= SMALL_PKT))
+ break;
+
+ /* handle proto if start of packet */
+ if (sbuf->pkt_remain == 0) { /* start of new block */
+ res = sbuf_call_proto(sbuf, SBUF_EV_READ);
+ if (!res)
+ return false;
+ Assert(sbuf->pkt_remain > 0);
+ }
+
+ /* walk pkt, merge sends */
+ if (avail > sbuf->pkt_remain)
+ avail = sbuf->pkt_remain;
+ if (!sbuf->pkt_skip) {
+ if (sbuf->send_remain == 0)
+ sbuf->send_pos = sbuf->pkt_pos;
+ sbuf->send_remain += avail;
+ }
+ sbuf->pkt_remain -= avail;
+ sbuf->pkt_pos += avail;
+
+ /* send data */
+ if (sbuf->pkt_skip || sbuf->pkt_flush) {
+ res = sbuf_send_pending(sbuf);
+ if (!res)
+ return false;
+ }
+ }
+
+ return sbuf_send_pending(sbuf);
+}
+
+/* reposition at buffer start again */
+static void sbuf_try_resync(SBuf *sbuf)
+{
+ int avail;
+
+ if (sbuf->pkt_pos == 0)
+ return;
+
+ if (sbuf->send_remain > 0)
+ avail = sbuf->recv_pos - sbuf->send_pos;
+ else
+ avail = sbuf->recv_pos - sbuf->pkt_pos;
+
+ if (avail == 0) {
+ sbuf->recv_pos = sbuf->pkt_pos = sbuf->send_pos = 0;
+ } else if (avail <= SMALL_PKT) {
+ if (sbuf->send_remain > 0) {
+ memmove(sbuf->buf, sbuf->buf + sbuf->send_pos, avail);
+ sbuf->pkt_pos -= sbuf->send_pos;
+ sbuf->send_pos = 0;
+ sbuf->recv_pos = avail;
+ } else {
+ memmove(sbuf->buf, sbuf->buf + sbuf->pkt_pos, avail);
+ sbuf->send_pos = 0;
+ sbuf->pkt_pos = 0;
+ sbuf->recv_pos = avail;
+ }
+ }
+}
+
+/* actually ask kernel for more data */
+static bool sbuf_actual_recv(SBuf *sbuf, int len)
+{
+ int got;
+ uint8 *pos;
+
+ pos = sbuf->buf + sbuf->recv_pos;
+ got = safe_recv(sbuf->sock, pos, len, 0);
+
+ if (got == 0) {
+ /* eof from socket */
+ sbuf_call_proto(sbuf, SBUF_EV_RECV_FAILED);
+ return false;
+ } else if (got < 0) {
+ if (errno == EAGAIN) {
+ /* we tried too much, socket is empty.
+ act as zero bytes was read */
+ got = 0;
+ } else {
+ /* some error occured */
+ sbuf_call_proto(sbuf, SBUF_EV_RECV_FAILED);
+ return false;
+ }
+ }
+ sbuf->recv_pos += got;
+ return true;
+}
+
+/* callback for libevent EV_READ */
+static void sbuf_recv_cb(int sock, short flags, void *arg)
+{
+ int free, ok;
+ SBuf *sbuf = arg;
+
+ /* reading should be disabled when waiting */
+ Assert(sbuf->wait_send == 0);
+
+try_more:
+ /* make room in buffer */
+ sbuf_try_resync(sbuf);
+
+ /*
+ * FIXME: When called from sbuf_continue(), there is already
+ * data waiting. Thus there will be unneccesary recv().
+ */
+ free = cf_sbuf_len - sbuf->recv_pos;
+ if (free > SMALL_PKT) {
+ ok = sbuf_actual_recv(sbuf, free);
+ if (!ok)
+ return;
+ }
+
+ /* now handle it */
+ ok = sbuf_process_pending(sbuf);
+
+ /* if the buffer is full, there can be more data available */
+ if (ok && sbuf->recv_pos == cf_sbuf_len)
+ goto try_more;
+}
+
+/* check if there is any error pending on socket */
+static bool sbuf_after_connect_check(SBuf *sbuf)
+{
+ int optval = 0, err;
+ socklen_t optlen = sizeof(optval);
+
+ err = getsockopt(sbuf->sock, SOL_SOCKET, SO_ERROR, (void*)&optval, &optlen);
+ if (err < 0) {
+ log_error("sbuf_after_connect_check: getsockopt: %s",
+ strerror(errno));
+ return false;
+ }
+ if (optval != 0) {
+ log_error("sbuf_after_connect_check: pending error: %s",
+ strerror(optval));
+ return false;
+ }
+ return true;
+}
+
+/* callback for libevent EV_WRITE when connecting */
+static void sbuf_connect_cb(int sock, short flags, void *arg)
+{
+ SBuf *sbuf = arg;
+
+ if (flags & EV_WRITE) {
+ if (sbuf_after_connect_check(sbuf)) {
+ if (sbuf_call_proto(sbuf, SBUF_EV_CONNECT_OK))
+ sbuf_wait_for_data(sbuf);
+ } else
+ sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+ } else {
+ /* EV_TIMEOUT */
+ sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+ }
+}
+
+/* send some data to listening socket */
+bool sbuf_answer(SBuf *sbuf, const void *buf, int len)
+{
+ int res;
+ if (sbuf->sock <= 0)
+ return false;
+ res = safe_send(sbuf->sock, buf, len, 0);
+ if (res < 0)
+ log_error("sbuf_answer: error sending: %s", strerror(errno));
+ else if (res != len)
+ log_error("sbuf_answer: partial send: len=%d sent=%d", len, res);
+ return res == len;
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+typedef enum {
+ SBUF_EV_READ,
+ SBUF_EV_RECV_FAILED,
+ SBUF_EV_SEND_FAILED,
+ SBUF_EV_CONNECT_FAILED,
+ SBUF_EV_CONNECT_OK
+} SBufEvent;
+
+typedef struct SBuf SBuf;
+
+/* callback should return true if it used one of sbuf_prepare_* on sbuf,
+ false if it used sbuf_pause(), sbuf_close() or simply wants to wait for
+ next event loop (eg. too few data available). */
+typedef bool (*sbuf_proto_cb_t)(SBuf *sbuf,
+ SBufEvent evtype,
+ MBuf *mbuf,
+ void *arg);
+
+/* for some reason, libevent has no typedef for callback */
+typedef void (*sbuf_libevent_cb)(int, short, void *);
+
+struct SBuf {
+ /* libevent handle */
+ struct event ev;
+
+ /* protocol callback function */
+ sbuf_proto_cb_t proto_handler;
+ void *arg;
+
+ /* fd for this socket */
+ int sock;
+
+ /* dest SBuf for current packet */
+ SBuf *dst;
+
+ unsigned recv_pos;
+ unsigned pkt_pos;
+ unsigned pkt_remain;
+ unsigned send_pos;
+ unsigned send_remain;
+
+ unsigned wait_send:1;
+ unsigned pkt_skip:1;
+ unsigned pkt_flush:1;
+ unsigned is_unix:1;
+
+ uint8 buf[0];
+};
+
+#define sbuf_socket(sbuf) ((sbuf)->sock)
+
+void sbuf_init(SBuf *sbuf, sbuf_proto_cb_t proto_fn, void *arg);
+void sbuf_accept(SBuf *sbuf, int read_sock, bool is_unix);
+void sbuf_connect(SBuf *sbuf, const PgAddr *addr, int timeout_sec);
+
+void sbuf_pause(SBuf *sbuf);
+void sbuf_continue(SBuf *sbuf);
+void sbuf_close(SBuf *sbuf);
+
+/* proto_fn can use those functions to order behaviour */
+void sbuf_prepare_send(SBuf *sbuf, SBuf *dst, unsigned amount, bool flush);
+void sbuf_prepare_skip(SBuf *sbuf, int amount);
+
+bool sbuf_answer(SBuf *sbuf, const void *buf, int len);
+
+void sbuf_continue_with_callback(SBuf *sbuf, sbuf_libevent_cb cb);
+
+static inline bool sbuf_empty(SBuf *sbuf)
+{
+ return sbuf->send_pos == sbuf->recv_pos
+ && sbuf->pkt_remain == 0;
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Handling of server connections
+ */
+
+#include "bouncer.h"
+
+/* process packets on server auth phase */
+static bool handle_server_startup(PgSocket *server, MBuf *pkt)
+{
+ unsigned pkt_type;
+ unsigned pkt_len;
+ SBuf *sbuf = &server->sbuf;
+ bool res = false;
+
+ if (!get_header(pkt, &pkt_type, &pkt_len)) {
+ disconnect_server(server, true, "bad pkt in login phase");
+ return false;
+ }
+
+ if (pkt_len > mbuf_avail(pkt) + 5) {
+ disconnect_server(server, true, "partial pkt in login phase");
+ return false;
+ }
+
+ log_noise("S: pkt '%c', len=%d", pkt_type, pkt_len);
+
+ switch (pkt_type) {
+ default:
+ slog_error(server, "unknown pkt from server: '%c'", pkt_type);
+ disconnect_server(server, true, "unknown pkt from server");
+ break;
+ case 'E': /* ErrorResponse */
+ log_server_error("S: login failed", pkt);
+ disconnect_server(server, true, "login failed");
+ break;
+
+ /* packets that need closer look */
+ case 'R': /* AuthenticationXXX */
+ log_debug("calling login_answer");
+ res = answer_authreq(server, pkt_type, pkt_len, pkt);
+ break;
+ case 'S': /* ParameterStatus */
+ res = add_welcome_parameter(server, pkt_type, pkt_len, pkt);
+ break;
+ case 'Z': /* ReadyForQuery */
+ /* login ok */
+ log_debug("server login ok, start accepting queries");
+ server->ready = 1;
+
+ finish_welcome_msg(server);
+ release_server(server);
+
+ /* let the takeover process handle it */
+ if (server->pool->admin)
+ takeover_login(server);
+ res = true;
+ break;
+
+ /* ignorable packets */
+ case 'K': /* BackendKeyData */
+ if (mbuf_avail(pkt) >= 8)
+ memcpy(server->cancel_key, mbuf_get_bytes(pkt, 8), 8);
+ res = true;
+ break;
+ case 'N': /* NoticeResponse */
+ slog_noise(server, "skipping pkt: %c", pkt_type);
+ res = true;
+ break;
+ }
+
+ if (res)
+ sbuf_prepare_skip(sbuf, pkt_len);
+
+ return res;
+}
+
+/* process packets on logged in connection */
+static bool handle_server_work(PgSocket *server, MBuf *pkt)
+{
+ unsigned pkt_type;
+ unsigned pkt_len;
+ bool flush = 0;
+ bool ready = 0;
+ char state;
+ SBuf *sbuf = &server->sbuf;
+ PgSocket *client = server->link;
+
+ Assert(!server->pool->admin);
+
+ if (!get_header(pkt, &pkt_type, &pkt_len)) {
+ disconnect_server(server, true, "bad pkt header");
+ return false;
+ }
+ slog_noise(server, "pkt='%c' len=%d", pkt_type, pkt_len);
+
+ switch (pkt_type) {
+ default:
+ slog_error(server, "unknown pkt: '%c'", pkt_type);
+ disconnect_server(server, true, "unknown pkt");
+ return false;
+
+ /* pooling decisions will be based on this packet */
+ case 'Z': /* ReadyForQuery */
+
+ /* if partial pkt, wait */
+ if (mbuf_avail(pkt) == 0)
+ return false;
+ state = mbuf_get_char(pkt);
+
+ /* set ready only if no tx */
+ if (state == 'I')
+ ready = 1;
+ else if (cf_pool_mode == POOL_STMT) {
+ disconnect_server(server, true,
+ "Long transactions not allowed");
+ return false;
+ }
+
+ case 'E': /* ErrorResponse */
+ case 'N': /* NoticeResponse */
+
+ /* above packers need to be sent immidiately */
+ flush = 1;
+
+ /*
+ * chat packets, but server (and thus pooler)
+ * is allowed to buffer them until Sync or Flush
+ * is sent by client.
+ */
+ case '2': /* BindComplete */
+ case '3': /* CloseComplete */
+ case 'c': /* CopyDone(F/B) */
+ case 'f': /* CopyFail(F/B) */
+ case 'I': /* EmptyQueryResponse == CommandComplete */
+ case 'V': /* FunctionCallResponse */
+ case 'n': /* NoData */
+ case 'G': /* CopyInResponse */
+ case 'H': /* CopyOutResponse */
+ case '1': /* ParseComplete */
+ case 'A': /* NotificationResponse */
+ case 's': /* PortalSuspended */
+ case 'C': /* CommandComplete */
+
+ /* check if client wanted immidiate response */
+ if (client && client->flush_req) {
+ flush = 1;
+ client->flush_req = 0;
+ }
+
+ /* data packets, there will be more coming */
+ case 'd': /* CopyData(F/B) */
+ case 'D': /* DataRow */
+ case 't': /* ParameterDescription */
+ case 'S': /* ParameterStatus */
+ case 'T': /* RowDescription */
+
+ if (client) {
+ sbuf_prepare_send(sbuf, &client->sbuf, pkt_len, flush);
+ } else {
+ if (server->state != SV_TESTED)
+ log_warning("got packet '%c' from server"
+ " when not linked", pkt_type);
+ sbuf_prepare_skip(sbuf, pkt_len);
+ }
+ break;
+ }
+ server->ready = ready;
+
+ /* update stats */
+ server->pool->stats.server_bytes += pkt_len;
+ if (server->ready && client) {
+ usec_t total;
+ Assert(client->query_start != 0);
+
+ total = get_time_usec() - client->query_start;
+ client->query_start = 0;
+ server->pool->stats.query_time += total;
+ slog_debug(client, "query time: %d us", (int)total);
+ }
+
+ if (ready && ( cf_pool_mode != POOL_SESSION
+ || server->state == SV_TESTED))
+ release_server(server);
+
+ return true;
+}
+
+/* got connection, decide what to do */
+static bool handle_connect(PgSocket *server)
+{
+ bool res = false;
+ PgPool *pool = server->pool;
+
+ if (!statlist_empty(&pool->cancel_req_list)) {
+ slog_debug(server, "use it for pending cancel req");
+ /* if pending cancel req, send it */
+ forward_cancel_request(server);
+ /* notify disconnect_server() that connect did not fail */
+ server->ready = 1;
+ disconnect_server(server, false, "sent cancel req");
+ } else {
+ /* proceed with login */
+ res = send_startup_packet(server);
+ if (!res)
+ disconnect_server(server, false, "startup pkt failed");
+ }
+ return res;
+}
+
+/* callback from SBuf */
+bool server_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg)
+{
+ bool res = false;
+ PgSocket *server = arg;
+
+ Assert(is_server_socket(server));
+ Assert(server->state != SV_FREE);
+
+ switch (evtype) {
+ case SBUF_EV_RECV_FAILED:
+ disconnect_server(server, false, "server conn crashed?");
+ break;
+ case SBUF_EV_SEND_FAILED:
+ disconnect_client(server->link, false, "unexpected eof");
+ break;
+ case SBUF_EV_READ:
+ if (mbuf_avail(pkt) < 5) {
+ log_noise("S: got partial header, trying to wait a bit");
+ return false;
+ }
+
+ server->request_time = get_cached_time();
+ switch (server->state) {
+ case SV_LOGIN:
+ res = handle_server_startup(server, pkt);
+ break;
+ case SV_TESTED:
+ case SV_USED:
+ case SV_ACTIVE:
+ case SV_IDLE:
+ res = handle_server_work(server, pkt);
+ break;
+ default:
+ fatal("server_proto: server in bad state: %d", server->state);
+ }
+ break;
+ case SBUF_EV_CONNECT_FAILED:
+ Assert(server->state == SV_LOGIN);
+ disconnect_server(server, false, "connect failed");
+ break;
+ case SBUF_EV_CONNECT_OK:
+ log_debug("S: connect ok");
+ Assert(server->state == SV_LOGIN);
+ server->request_time = get_cached_time();
+ res = handle_connect(server);
+ }
+ return res;
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+bool server_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bouncer.h"
+
+static struct event ev_stats;
+static usec_t old_stamp, new_stamp;
+
+static void reset_stats(PgStats *stat)
+{
+ stat->server_bytes = 0;
+ stat->client_bytes = 0;
+ stat->request_count = 0;
+ stat->query_time = 0;
+}
+
+static void stat_add(PgStats *total, PgStats *stat)
+{
+ total->server_bytes += stat->server_bytes;
+ total->client_bytes += stat->client_bytes;
+ total->request_count += stat->request_count;
+ total->query_time += stat->query_time;
+}
+
+static void calc_average(PgStats *avg, PgStats *cur, PgStats *old)
+{
+ uint64 qcount;
+ usec_t dur = get_cached_time() - old_stamp;
+
+ reset_stats(avg);
+
+ if (dur <= 0)
+ return;
+
+ avg->request_count = USEC * (cur->request_count - old->request_count) / dur;
+ avg->client_bytes = USEC * (cur->client_bytes - old->client_bytes) / dur;
+ avg->server_bytes = USEC * (cur->server_bytes - old->server_bytes) / dur;
+ qcount = cur->request_count - old->request_count;
+ if (qcount > 0)
+ avg->query_time = (cur->query_time - old->query_time) / qcount;
+}
+
+static void write_stats(PktBuf *buf, PgStats *stat, PgStats *old, char *dbname)
+{
+ PgStats avg;
+ calc_average(&avg, stat, old);
+ pktbuf_write_DataRow(buf, "sqqqqqqqq", dbname,
+ stat->request_count, stat->client_bytes,
+ stat->server_bytes, stat->query_time,
+ avg.request_count, avg.client_bytes,
+ avg.server_bytes, avg.query_time);
+}
+
+bool admin_database_stats(PgSocket *client, StatList *pool_list)
+{
+ PgPool *pool;
+ List *item;
+ PgDatabase *cur_db = NULL;
+ PgStats st_total, st_db, old_db, old_total;
+ int rows = 0;
+ PktBuf *buf;
+
+ reset_stats(&st_total);
+ reset_stats(&st_db);
+ reset_stats(&old_db);
+ reset_stats(&old_total);
+
+ buf = pktbuf_dynamic(512);
+ if (!buf) {
+ admin_error(client, "no mem");
+ return true;
+ }
+
+ pktbuf_write_RowDescription(buf, "sqqqqqqqq", "database",
+ "total_requests", "total_received",
+ "total_sent", "total_query_time",
+ "avg_req", "avg_recv", "avg_sent",
+ "avg_query");
+ statlist_for_each(item, pool_list) {
+ pool = container_of(item, PgPool, head);
+
+ if (!cur_db)
+ cur_db = pool->db;
+
+ if (pool->db != cur_db) {
+ write_stats(buf, &st_db, &old_db, cur_db->name);
+
+ rows ++;
+ cur_db = pool->db;
+ stat_add(&st_total, &st_db);
+ stat_add(&old_total, &old_db);
+ reset_stats(&st_db);
+ reset_stats(&old_db);
+ }
+
+ stat_add(&st_db, &pool->stats);
+ stat_add(&old_db, &pool->older_stats);
+ }
+ if (cur_db) {
+ write_stats(buf, &st_db, &old_db, cur_db->name);
+ stat_add(&st_total, &st_db);
+ stat_add(&old_total, &old_db);
+ rows ++;
+ }
+ admin_flush(client, buf, "SHOW");
+
+ return true;
+}
+
+static void refresh_stats(int s, short flags, void *arg)
+{
+ List *item;
+ PgPool *pool;
+ struct timeval period = { cf_stats_period, 0 };
+ PgStats old_total, cur_total, avg;
+
+ reset_stats(&old_total);
+ reset_stats(&cur_total);
+
+ old_stamp = new_stamp;
+ new_stamp = get_cached_time();
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ pool->older_stats = pool->newer_stats;
+ pool->newer_stats = pool->stats;
+
+ stat_add(&cur_total, &pool->stats);
+ stat_add(&old_total, &pool->older_stats);
+ }
+ evtimer_add(&ev_stats, &period);
+
+ calc_average(&avg, &cur_total, &old_total);
+ /* send totals to logfile */
+ log_info("Stats: %llu req/s, in %llu b/s, "
+ "out %llu b/s, query %llu us",
+ avg.request_count, avg.client_bytes,
+ avg.server_bytes, avg.query_time);
+}
+
+void stats_setup(void)
+{
+ struct timeval period = { cf_stats_period, 0 };
+
+ new_stamp = get_time_usec();
+ old_stamp = new_stamp - USEC;
+
+ /* launch maintenance */
+ evtimer_set(&ev_stats, refresh_stats, NULL);
+ evtimer_add(&ev_stats, &period);
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void stats_setup(void);
+
+bool admin_database_stats(PgSocket *client, StatList *pool_list);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Required system headers
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "../config.h"
+#endif
+
+#define _GNU_SOURCE
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <limits.h>
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#ifdef HAVE_CRYPT_H
+#include <crypt.h>
+#endif
+
+#ifdef CASSERT
+#define Assert(e) do { if (!(e)) fatal("Assert(%s) failed", #e); } while (0)
+#else
+#define Assert(e)
+#endif
+
+#ifndef OPEN_MAX
+#define OPEN_MAX sysconf(_SC_OPEN_MAX)
+#endif
+
+/* how many microseconds in a second */
+#define USEC (1000000LL)
+
+typedef enum { false=0, true=1 } bool;
+
+typedef uint8_t uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+
+#define INT8OID 20
+#define INT4OID 23
+#define TEXTOID 25
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Connect to running bouncer process, load fds from it, shut it down
+ * and continue with them.
+ *
+ * Each row from SHOW FDS will have corresponging fd in ancillary message.
+ *
+ * Manpages: unix, sendmsg, recvmsg, cmsg, readv
+ */
+
+#include "bouncer.h"
+
+/*
+ * Takeover done, old process shut down,
+ * kick this one running.
+ */
+static void takeover_finish(PgSocket *bouncer)
+{
+ disconnect_server(bouncer, false, "disko over");
+ cf_reboot = 0;
+ resume_all();
+}
+
+/* parse msg for fd and info */
+static bool takeover_load_fd(MBuf *pkt, const struct cmsghdr *cmsg)
+{
+ int fd;
+ char *task, *s_addr, *user, *db;
+ int oldfd, port, linkfd;
+ uint64 ckey;
+ PgAddr addr;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (cmsg->cmsg_level == SOL_SOCKET
+ && cmsg->cmsg_type == SCM_RIGHTS
+ && cmsg->cmsg_len >= CMSG_LEN(sizeof(int)))
+ {
+ /* get the fd */
+ memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
+ log_debug("got fd: %d", fd);
+ } else
+ fatal("broken fd packet");
+
+ /* parse row contents */
+ scan_text_result(pkt, "issssiqi", &oldfd, &task, &user, &db,
+ &s_addr, &port, &ckey, &linkfd);
+ if (task == NULL || s_addr == NULL)
+ fatal("NULL data from old process");
+
+ log_debug("FD row: fd=%d(%d) linkfd=%d task=%s user=%s db=%s",
+ oldfd, fd, linkfd, task,
+ user ? user : "NULL",
+ db ? db : "NULL");
+
+ /* fill address */
+ addr.is_unix = strcmp(s_addr, "unix") == 0 ? true : false;
+ if (addr.is_unix) {
+ addr.port = cf_listen_port;
+ } else {
+ addr.ip_addr.s_addr = inet_addr(s_addr);
+ addr.port = port;
+ }
+
+ /* decide what to do with it */
+ if (strcmp(task, "client") == 0)
+ use_client_socket(fd, &addr, db, user, ckey, oldfd, linkfd);
+ else if (strcmp(task, "server") == 0)
+ use_server_socket(fd, &addr, db, user, ckey, oldfd, linkfd);
+ else if (strcmp(task, "pooler") == 0)
+ use_pooler_socket(fd, addr.is_unix);
+ else
+ fatal("unknown task: %s", task);
+
+ return true;
+}
+
+static void takeover_create_link(PgPool *pool, PgSocket *client)
+{
+ List *item;
+ PgSocket *server;
+
+ statlist_for_each(item, &pool->active_server_list) {
+ server = container_of(item, PgSocket, head);
+ if (server->tmp_sk_oldfd == client->tmp_sk_linkfd) {
+ server->link = client;
+ client->link = server;
+ return;
+ }
+ }
+ fatal("takeover_create_link: failed to find pair");
+}
+
+/* clean the inappropriate places the old fds got stored in */
+static void takeover_clean_socket_list(StatList *list)
+{
+ List *item;
+ PgSocket *sk;
+ statlist_for_each(item, list) {
+ sk = container_of(item, PgSocket, head);
+ if (sk->suspended) {
+ sk->tmp_sk_oldfd = get_cached_time();
+ sk->tmp_sk_linkfd = get_cached_time();
+ }
+ }
+}
+
+/* all fds loaded, create links */
+static void takeover_postprocess_fds(void)
+{
+ List *item, *item2;
+ PgSocket *client;
+ PgPool *pool;
+
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ if (pool->admin)
+ continue;
+ statlist_for_each(item2, &pool->active_client_list) {
+ client = container_of(item2, PgSocket, head);
+ if (client->suspended && client->tmp_sk_linkfd)
+ takeover_create_link(pool, client);
+ }
+ }
+ statlist_for_each(item, &pool_list) {
+ pool = container_of(item, PgPool, head);
+ takeover_clean_socket_list(&pool->active_client_list);
+ takeover_clean_socket_list(&pool->active_server_list);
+ takeover_clean_socket_list(&pool->idle_server_list);
+ }
+}
+
+static void next_command(PgSocket *bouncer, MBuf *pkt)
+{
+ bool res = true;
+ const char *cmd = mbuf_get_string(pkt);
+
+ log_debug("takeover_recv_fds: 'C' body: %s", cmd);
+ if (strcmp(cmd, "SUSPEND") == 0) {
+ log_info("SUSPEND finished, sending SHOW FDS");
+ SEND_generic(res, bouncer, 'Q', "s", "SHOW FDS;");
+ } else if (strncmp(cmd, "SHOW", 4) == 0) {
+
+ log_info("SHOW FDS finished, sending SHUTDOWN");
+
+ /* all fds loaded, review them */
+ takeover_postprocess_fds();
+
+ /* all OK, kill old one */
+ SEND_generic(res, bouncer, 'Q', "s", "SHUTDOWN;");
+ } else
+ fatal("got bad CMD from old bouncer: %s", cmd);
+
+ if (!res)
+ fatal("command send failed");
+}
+
+static void takeover_parse_data(PgSocket *bouncer,
+ struct msghdr *msg, MBuf *data)
+{
+ struct cmsghdr *cmsg;
+ unsigned pkt_type, pkt_len;
+ uint8 *pktptr;
+ MBuf pkt;
+
+ cmsg = msg->msg_controllen ? CMSG_FIRSTHDR(msg) : NULL;
+
+ while (mbuf_avail(data) > 0) {
+ if (!get_header(data, &pkt_type, &pkt_len))
+ fatal("cannot parse packet");
+
+ pktptr = (uint8*)mbuf_get_bytes(data, pkt_len - 5);
+ mbuf_init(&pkt, pktptr, pkt_len - 5);
+
+ switch (pkt_type) {
+ case 'T': /* RowDescription */
+ log_debug("takeover_parse_data: 'T'");
+ break;
+ case 'D': /* DataRow */
+ log_debug("takeover_parse_data: 'D'");
+ if (cmsg) {
+ takeover_load_fd(&pkt, cmsg);
+ cmsg = CMSG_NXTHDR(msg, cmsg);
+ } else
+ fatal("got row without fd info");
+ break;
+ case 'Z': /* ReadyForQuery */
+ log_debug("takeover_parse_data: 'Z'");
+ break;
+ case 'C': /* CommandComplete */
+ log_debug("takeover_parse_data: 'C'");
+ next_command(bouncer, &pkt);
+ break;
+ case 'E': /* ErrorMessage */
+ log_server_error("old bouncer sent", &pkt);
+ fatal("something failed");
+ default:
+ fatal("takeover_parse_data: unexpected pkt: '%c'", pkt_type);
+ }
+ }
+}
+
+/*
+ * listen for data from old bouncer.
+ *
+ * use always sendmsg, to keep code simpler
+ */
+static void takeover_recv_cb(int sock, short flags, void *arg)
+{
+ PgSocket *bouncer = arg;
+ uint8 data_buf[2048];
+ uint8 cnt_buf[128];
+ struct msghdr msg;
+ struct iovec io;
+ int res;
+ MBuf data;
+
+ memset(&msg, 0, sizeof(msg));
+ io.iov_base = data_buf;
+ io.iov_len = sizeof(data_buf);
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cnt_buf;
+ msg.msg_controllen = sizeof(cnt_buf);
+
+ res = safe_recvmsg(sock, &msg, 0);
+ if (res > 0) {
+ mbuf_init(&data, data_buf, res);
+ takeover_parse_data(bouncer, &msg, &data);
+ } else if (res == 0) {
+ takeover_finish(bouncer);
+ } else {
+ if (errno == EAGAIN)
+ return;
+ fatal_perror("safe_recvmsg");
+ }
+}
+
+/*
+ * login finished, send first command,
+ * replace recv callback with custom recvmsg() based one.
+ */
+void takeover_login(PgSocket *bouncer)
+{
+ bool res;
+
+ slog_info(bouncer, "Login OK, sending SUSPEND");
+ SEND_generic(res, bouncer, 'Q', "s", "SUSPEND;");
+
+ /* use own callback */
+ sbuf_pause(&bouncer->sbuf);
+ sbuf_continue_with_callback(&bouncer->sbuf, takeover_recv_cb);
+}
+
+/* launch connection to running process */
+void takeover_init(void)
+{
+ PgDatabase *db = find_database("pgbouncer");
+ PgPool *pool = get_pool(db, db->forced_user);
+
+ if (!pool)
+ fatal("no admin pool?");
+
+ log_info("takeover_init: launching connection");
+ launch_new_connection(pool);
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void takeover_init(void);
+void takeover_login(PgSocket *bouncer);
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Random small utility functions
+ */
+
+#include "bouncer.h"
+
+#include "md5.h"
+
+#ifdef HAVE_SYS_UCRED_H
+#include <sys/ucred.h>
+#endif
+
+void *zmalloc(size_t len)
+{
+ void *p = malloc(len);
+ if (p)
+ memset(p, 0, len);
+ return p;
+}
+
+/*
+ * Safe string copy
+ */
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t n)
+{
+ size_t len = strlen(src);
+ if (len < n) {
+ memcpy(dst, src, len + 1);
+ } else if (n > 0) {
+ memcpy(dst, src, n - 1);
+ dst[n - 1] = 0;
+ }
+ return len;
+}
+#endif
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t n)
+{
+ size_t pos = 0;
+ while (pos < n && dst[pos])
+ pos++;
+ if (pos < n)
+ return pos + strlcpy(dst + pos, src, n - pos);
+ return pos + strlen(src);
+}
+#endif
+
+/*
+ * Generic logging
+ */
+
+static void render_time(char *buf, int max)
+{
+ struct tm tm;
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ localtime_r(&tv.tv_sec, &tm);
+ strftime(buf, max, "%Y-%m-%d %H:%M:%S", &tm);
+}
+
+static void _log_write(const char *pfx, const char *msg)
+{
+ char buf[1024];
+ char tbuf[64];
+ int len;
+ render_time(tbuf, sizeof(tbuf));
+ len = snprintf(buf, sizeof(buf), "%s %u %s %s\n",
+ tbuf, (unsigned)getpid(), pfx, msg);
+ if (cf_logfile) {
+ int fd = open(cf_logfile, O_CREAT | O_APPEND | O_WRONLY, 0644);
+ if (fd > 0) {
+ safe_write(fd, buf, len);
+ safe_close(fd);
+ }
+ }
+ if (!cf_daemon)
+ fprintf(stderr, "%s", buf);
+}
+
+static void _log(const char *pfx, const char *fmt, va_list ap)
+{
+ char buf[1024];
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+ _log_write(pfx, buf);
+}
+
+void _fatal(const char *file, int line, const char *func,
+ const char *fmt, ...)
+{
+ va_list ap;
+ char buf[1024];
+
+ snprintf(buf, sizeof(buf),
+ "@%s:%d in function %s(): %s",
+ file, line, func, fmt);
+
+ va_start(ap, fmt);
+ _log("FATAL", buf, ap);
+ va_end(ap);
+ if (cf_verbose > 2)
+ abort();
+ exit(1);
+}
+
+void _fatal_perror(const char *file, int line, const char *func,
+ const char *fmt, ...)
+{
+ va_list ap;
+ char buf[1024];
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+ _fatal(file, line, func, "%s: %s", buf, strerror(errno));
+}
+
+/*
+ * generic logging
+ */
+void log_level(const char *pfx, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ _log(pfx, fmt, ap);
+ va_end(ap);
+}
+
+/*
+ * Logging about specific PgSocket
+ */
+
+void
+slog_level(const char *pfx, const PgSocket *sock, const char *fmt, ...)
+{
+ char buf1[1024];
+ char buf2[1024];
+ char *user, *db, *host;
+ int port;
+ va_list ap;
+
+ db = sock->pool ? sock->pool->db->name : "(nodb)";
+ user = sock->auth_user ? sock->auth_user->name : "(nouser)";
+ if (sock->addr.is_unix) {
+ host = "unix";
+ } else {
+ host = inet_ntoa(sock->addr.ip_addr);
+ }
+ port = sock->addr.port;
+
+ va_start(ap, fmt);
+ vsnprintf(buf1, sizeof(buf1), fmt, ap);
+ va_end(ap);
+
+ snprintf(buf2, sizeof(buf2), "%c: %s/%s@%s:%d %s",
+ is_server_socket(sock) ? 'S' : 'C',
+ db, user, host, port, buf1);
+
+ _log_write(pfx, buf2);
+}
+
+
+/*
+ * Wrappers for read/write/recv/send that survive interruptions.
+ */
+
+int safe_read(int fd, void *buf, int len)
+{
+ int res;
+loop:
+ res = read(fd, buf, len);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+ return res;
+}
+
+int safe_write(int fd, const void *buf, int len)
+{
+ int res;
+loop:
+ res = write(fd, buf, len);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+ return res;
+}
+
+int safe_recv(int fd, void *buf, int len, int flags)
+{
+ int res;
+loop:
+ res = recv(fd, buf, len, flags);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+ if (res < 0)
+ log_noise("safe_recv(%d, %d) = %s", fd, len, strerror(errno));
+ else if (cf_verbose > 2)
+ log_noise("safe_recv(%d, %d) = %d", fd, len, res);
+ return res;
+}
+
+int safe_send(int fd, const void *buf, int len, int flags)
+{
+ int res;
+loop:
+ res = send(fd, buf, len, flags);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+ if (res < 0)
+ log_noise("safe_send(%d, %d) = %s", fd, len, strerror(errno));
+ else if (cf_verbose > 2)
+ log_noise("safe_send(%d, %d) = %d", fd, len, res);
+ return res;
+}
+
+int safe_close(int fd)
+{
+ int res;
+loop:
+ /* by manpage, the close() could be interruptable
+ although it seems that at least in linux it cannot happen */
+ res = close(fd);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+ return res;
+}
+
+int safe_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+ int res;
+loop:
+ res = recvmsg(fd, msg, flags);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+ if (res < 0)
+ log_warning("safe_recvmsg(%d, msg, %d) = %s", fd, flags, strerror(errno));
+ else if (cf_verbose > 2)
+ log_noise("safe_recvmsg(%d, msg, %d) = %d", fd, flags, res);
+ return res;
+}
+
+int safe_sendmsg(int fd, const struct msghdr *msg, int flags)
+{
+ int res;
+ int msgerr_count = 0;
+loop:
+ res = sendmsg(fd, msg, flags);
+ if (res < 0 && errno == EINTR)
+ goto loop;
+
+ if (res < 0) {
+ log_warning("safe_sendmsg(%d, msg[%d,%d], %d) = %s", fd,
+ msg->msg_iov[0].iov_len,
+ msg->msg_controllen,
+ flags, strerror(errno));
+
+ /* with ancillary data pn blocking socket OSX returns
+ * EMSGSIZE instead of blocking. try to solve it by waiting */
+ if (errno == EMSGSIZE && msgerr_count < 20) {
+ struct timeval tv = {1, 0};
+ log_warning("trying to sleep a bit");
+ select(0, NULL, NULL, NULL, &tv);
+ msgerr_count++;
+ goto loop;
+ }
+ } else if (cf_verbose > 2)
+ log_noise("safe_sendmsg(%d, msg, %d) = %d", fd, flags, res);
+ return res;
+}
+
+/*
+ * Load a file into malloc()-ed C string.
+ */
+
+char *load_file(const char *fn)
+{
+ struct stat st;
+ char *buf = NULL;
+ int res, fd;
+
+ res = stat(fn, &st);
+ if (res < 0) {
+ log_error("%s: %s", fn, strerror(errno));
+ goto load_error;
+ }
+
+ buf = malloc(st.st_size + 1);
+ if (!buf)
+ goto load_error;
+
+ if ((fd = open(fn, O_RDONLY)) < 0) {
+ log_error("%s: %s", fn, strerror(errno));
+ goto load_error;
+ }
+
+ if ((res = safe_read(fd, buf, st.st_size)) < 0) {
+ log_error("%s: %s", fn, strerror(errno));
+ goto load_error;
+ }
+
+ safe_close(fd);
+ buf[st.st_size] = 0;
+
+ return buf;
+
+load_error:
+ if (buf != NULL)
+ free(buf);
+ return NULL;
+}
+
+/*
+ * PostgreSQL MD5 "encryption".
+ */
+
+static void hash2hex(const uint8 *hash, char *dst)
+{
+ int i;
+ static const char hextbl [] = "0123456789abcdef";
+ for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
+ *dst++ = hextbl[hash[i] >> 4];
+ *dst++ = hextbl[hash[i] & 15];
+ }
+ *dst = 0;
+}
+
+bool pg_md5_encrypt(const char *part1,
+ const char *part2, size_t part2len,
+ char *dest)
+{
+ MD5_CTX ctx;
+ uint8 hash[MD5_DIGEST_LENGTH];
+
+ MD5_Init(&ctx);
+ MD5_Update(&ctx, part1, strlen(part1));
+ MD5_Update(&ctx, part2, part2len);
+ MD5_Final(hash, &ctx);
+
+ memcpy(dest, "md5", 3);
+ hash2hex(hash, dest + 3);
+
+ memset(hash, 0, sizeof(*hash));
+ return true;
+}
+
+/* wrapper for usable crypt() */
+const char *pg_crypt(const char *passwd, const char *salt)
+{
+ return crypt(passwd, salt);
+}
+
+/* wrapped for getting random bytes */
+bool get_random_bytes(uint8 *dest, int len)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ dest[i] = random() & 255;
+ return len;
+}
+
+/*
+ * high-precision time
+ */
+
+usec_t get_time_usec(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (usec_t)tv.tv_sec * USEC + tv.tv_usec;
+}
+
+/*
+ * cache time, as we dont need sub-second precision
+ */
+static usec_t time_cache = 0;
+
+usec_t get_cached_time(void)
+{
+ if (!time_cache)
+ time_cache = get_time_usec();
+ return time_cache;
+}
+
+void reset_time_cache(void)
+{
+ time_cache = 0;
+}
+
+/*
+ * get other side's uid.
+ */
+bool get_unix_peer_uid(int fd, uid_t *uid_p)
+{
+ int res = -1;
+#ifdef SO_PEERCRED
+ struct ucred cred;
+ socklen_t len = sizeof(cred);
+ res = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len);
+ if (res >= 0)
+ *uid_p = cred.uid;
+ else
+ log_error("getsockopt(SO_PEERCRED): %s", strerror(errno));
+#else /* !SO_PEERCRED */
+#ifdef LOCAL_PEERCRED
+ struct xucred cred;
+ socklen_t len = sizeof(cred);
+ res = getsockopt(fd, AF_UNIX, LOCAL_PEERCRED, &cred, &len);
+ if (res >= 0)
+ *uid_p = cred.cr_uid;
+ else
+ log_error("getsockopt(LOCAL_PEERCRED): %s", strerror(errno));
+#endif /* !LOCAL_PEERCRED */
+#endif /* !SO_PEERCRED */
+ return (res >= 0);
+}
+
+void socket_set_nonblocking(int fd, int val)
+{
+ int flags, res;
+
+ /* get old flags */
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0)
+ fatal_perror("fcntl(F_GETFL)");
+
+ /* flip O_NONBLOCK */
+ if (val)
+ flags |= O_NONBLOCK;
+ else
+ flags &= ~O_NONBLOCK;
+
+ /* set new flags */
+ res = fcntl(fd, F_SETFL, flags);
+ if (res < 0)
+ fatal_perror("fcntl(F_SETFL)");
+}
+
+/* set needed socket options */
+void tune_socket(int sock, bool is_unix)
+{
+ int res;
+ int val;
+
+ /* close fd on exec */
+ res = fcntl(sock, F_SETFD, FD_CLOEXEC);
+ if (res < 0)
+ fatal_perror("fcntl FD_CLOEXEC");
+
+ /* when no data avail, return EAGAIN instead blocking */
+ socket_set_nonblocking(sock, 1);
+
+#ifdef SO_NOSIGPIPE
+ /* disallow SIGPIPE, if possible */
+ val = 1;
+ res = setsockopt(sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt SO_NOSIGPIPE");
+#endif
+
+ /*
+ * Following options are for network sockets
+ */
+ if (is_unix)
+ return;
+
+ /* the keepalive stuff needs some poking before enbling */
+ if (cf_tcp_keepalive) {
+ /* turn on socket keepalive */
+ val = 1;
+ res = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt SO_KEEPALIVE");
+#ifdef __linux__
+ /* set count of keepalive packets */
+ if (cf_tcp_keepcnt > 0) {
+ val = cf_tcp_keepcnt;
+ res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt TCP_KEEPCNT");
+ }
+ /* how lond the connection can stay idle before sending keepalive pkts */
+ if (cf_tcp_keepidle) {
+ val = cf_tcp_keepidle;
+ res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt TCP_KEEPIDLE");
+ }
+ /* time between packets */
+ if (cf_tcp_keepintvl) {
+ val = cf_tcp_keepintvl;
+ res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt TCP_KEEPINTVL");
+ }
+#else
+#ifdef TCP_KEEPALIVE
+ if (cf_tcp_keepidle) {
+ val = cf_tcp_keepidle;
+ res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt TCP_KEEPALIVE");
+ }
+#endif
+#endif
+ }
+
+ /* set in-kernel socket buffer size */
+ if (cf_tcp_socket_buffer) {
+ val = cf_tcp_socket_buffer;
+ res = setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt SO_SNDBUF");
+ val = cf_tcp_socket_buffer;
+ res = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt SO_RCVBUF");
+ }
+
+ /*
+ * Turn off kernel buffering, each send() will be one packet.
+ */
+ val = 1;
+ res = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
+ if (res < 0)
+ fatal_perror("setsockopt TCP_NODELAY");
+}
+
+
+bool strlist_contains(const char *liststr, const char *str)
+{
+ int c, len = strlen(str);
+ const char *p = strstr(liststr, str);
+
+ if (p == NULL)
+ return false;
+
+ /* check if item start */
+ if (p > liststr) {
+ c = *(p - 1);
+ if (!isspace(c) && c != ',')
+ return false;
+ }
+
+ /* check if item end */
+ c = p[len];
+ if (c != 0 && !isspace(c) && c != ',')
+ return false;
+
+ return true;
+}
+
+const char *format_date(usec_t uval)
+{
+ static char buf[128];
+ time_t tval = uval / USEC;
+ strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&tval));
+ return buf;
+}
+
--- /dev/null
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ *
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * time tools
+ */
+typedef uint64_t usec_t;
+usec_t get_cached_time(void);
+void reset_time_cache(void);
+usec_t get_time_usec(void);
+
+/*
+ * load file into malloced buffer
+ */
+char *load_file(const char *fn);
+
+void *zmalloc(size_t len);
+
+/*
+ * generic logging
+ */
+void log_level(const char *level, const char *s, ...);
+#define log_error(args...) log_level("ERROR", ## args)
+#define log_warning(args...) log_level("WARNING", ## args)
+#define log_info(args...) log_level("LOG", ## args)
+#define log_debug(args...) do { \
+ if (cf_verbose > 0) log_level("DEBUG", ## args); \
+ } while (0)
+#define log_noise(args...) do { \
+ if (cf_verbose > 1) log_level("NOISE", ## args); \
+ } while (0)
+
+
+/*
+ * logging about specific socket
+ */
+void slog_level(const char *level, const PgSocket *sock, const char *fmt, ...);
+#define slog_error(sk, args...) slog_level("ERROR", sk, ## args)
+#define slog_warning(sk, args...) slog_level("WARNING", sk, ## args)
+#define slog_info(sk, args...) slog_level("LOG", sk, ## args)
+#define slog_debug(sk, args...) do { \
+ if (cf_verbose > 0) slog_level("DEBUG", sk, ## args); \
+ } while (0)
+#define slog_noise(sk, args...) do { \
+ if (cf_verbose > 1) slog_level("NOISE", sk, ## args); \
+ } while (0)
+
+/*
+ * log and exit
+ */
+void _fatal(const char *file, int line, const char *func, const char *s, ...);
+void _fatal_perror(const char *file, int line, const char *func, const char *s, ...);
+#define fatal(args...) \
+ _fatal(__FILE__, __LINE__, __FUNCTION__, ## args)
+#define fatal_perror(args...) \
+ _fatal_perror(__FILE__, __LINE__, __FUNCTION__, ## args)
+
+/*
+ * non-interruptible operations
+ */
+int safe_read(int fd, void *buf, int len);
+int safe_write(int fd, const void *buf, int len);
+int safe_recv(int fd, void *buf, int len, int flags);
+int safe_send(int fd, const void *buf, int len, int flags);
+int safe_close(int fd);
+int safe_recvmsg(int fd, struct msghdr *msg, int flags);
+int safe_sendmsg(int fd, const struct msghdr *msg, int flags);
+
+/*
+ * password tools
+ */
+#define MD5_PASSWD_LEN 35
+#define isMD5(passwd) (memcmp(passwd, "md5", 3) == 0 \
+ && strlen(passwd) == MD5_PASSWD_LEN)
+bool pg_md5_encrypt(const char *part1, const char *part2, size_t p2len, char *dest);
+const char *pg_crypt(const char *passwd, const char *salt);
+bool get_random_bytes(uint8 *dest, int len);
+
+/*
+ * safe string copy
+ */
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t n);
+#endif
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t n);
+#endif
+
+/*
+ * socket option handling
+ */
+bool get_unix_peer_uid(int fd, uid_t *uid_p);
+void socket_set_nonblocking(int fd, int val);
+void tune_socket(int sock, bool is_unix);
+
+bool strlist_contains(const char *liststr, const char *str);
+
+const char *format_date(usec_t uval);
+
--- /dev/null
+
+PGINC = -I$(shell pg_config --includedir)
+PGLIB = -L$(shell pg_config --libdir)
+
+CFLAGS = -O2 -g -Wall $(PGINC) -I$(HOME)/src/libevent -I../src
+LDFLAGS = $(PGLIB) -lpq -L$(HOME)/src/libevent/.libs -levent
+
+all: asynctest
+
+asynctest: asynctest.c
+
+clean:
+ rm -f asynctest
+
--- /dev/null
+/*
+ * Things to test:
+ * - Conn per query
+ * - show tx
+ * - long tx
+ * - variable-size query
+ */
+
+#include <sys/time.h>
+#include <sys/select.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libpq-fe.h>
+#include <event.h>
+
+#define Assert(e) do { if (!(e)) { \
+ printf("Assert(%s) failed: %s:%d in %s\n", \
+ #e, __FILE__, __LINE__, __FUNCTION__); \
+ exit(1); } } while (0)
+
+typedef enum { false=0, true=1 } bool;
+
+#include "list.h"
+
+typedef struct DbConn {
+ List head;
+ const char *connstr;
+ struct event ev;
+ //time_t connect_time;
+ //unsigned query_count;
+ PGconn *con;
+ const char *query;
+} DbConn;
+
+static LIST(idle_list);
+static LIST(active_list);
+
+static DbConn *new_db(const char *connstr)
+{
+ DbConn *db = malloc(sizeof(*db));
+ memset(db, 0, sizeof(*db));
+ list_init(&db->head);
+ db->connstr = connstr;
+ return db;
+}
+
+static void set_idle(DbConn *db)
+{
+ Assert(item_in_list(&db->head, &active_list));
+ list_del(&db->head);
+ list_append(&db->head, &idle_list);
+}
+
+static void set_active(DbConn *db)
+{
+ Assert(item_in_list(&db->head, &idle_list));
+ list_del(&db->head);
+ list_append(&db->head, &active_list);
+}
+
+/** some error happened */
+static void conn_error(DbConn *db, const char *desc)
+{
+ if (db->con) {
+ printf("libpq error in %s: %s\n",
+ desc, PQerrorMessage(db->con));
+ PQfinish(db->con);
+ db->con = NULL;
+ } else {
+ printf("random error\n");
+ }
+ set_idle(db);
+}
+
+/**
+ * Connection has a resultset avalable, fetch it.
+ *
+ * Returns true if there may be more results coming,
+ * false if all done.
+ */
+static bool another_result(DbConn *db)
+{
+ PGresult *res;
+
+ /* got one */
+ res = PQgetResult(db->con);
+ if (res == NULL) {
+ set_idle(db);
+ if (1) {
+ PQfinish(db->con);
+ db->con = NULL;
+ }
+ return false;
+ }
+
+ switch (PQresultStatus(res)) {
+ case PGRES_TUPLES_OK:
+ // todo: check result
+ case PGRES_COMMAND_OK:
+ PQclear(res);
+ break;
+ default:
+ PQclear(res);
+ conn_error(db, "weird result");
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Called when select() told that conn is avail for reading/writing.
+ *
+ * It should call postgres handlers and then change state if needed.
+ */
+static void result_cb(int sock, short flags, void *arg)
+{
+ DbConn *db = arg;
+ int res;
+
+ res = PQconsumeInput(db->con);
+ if (res == 0) {
+ conn_error(db, "PQconsumeInput");
+ return;
+ }
+
+ /* loop until PQgetResult returns NULL */
+ while (1) {
+ /* if PQisBusy, then incomplete result */
+ if (PQisBusy(db->con)) {
+ event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db);
+ event_add(&db->ev, NULL);
+ break;
+ }
+
+ /* got one */
+ if (!another_result(db))
+ break;
+ }
+}
+
+static void send_cb(int sock, short flags, void *arg)
+{
+ int res;
+ DbConn *db = arg;
+
+ res = PQflush(db->con);
+ if (res > 0) {
+ event_set(&db->ev, PQsocket(db->con), EV_WRITE, send_cb, db);
+ event_add(&db->ev, NULL);
+ } else if (res == 0) {
+ event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db);
+ event_add(&db->ev, NULL);
+ } else
+ conn_error(db, "PQflush");
+}
+
+/** send the query to server connection */
+static void send_query(DbConn *db)
+{
+ int res;
+
+ /* send query */
+ res = PQsendQueryParams(db->con, db->query, 0,
+ NULL, /* paramTypes */
+ NULL, /* paramValues */
+ NULL, /* paramLengths */
+ NULL, /* paramFormats */
+ 0); /* resultformat, 0-text, 1-bin */
+ if (!res) {
+ conn_error(db, "PQsendQueryParams");
+ return;
+ }
+
+ /* flush it down */
+ res = PQflush(db->con);
+ if (res > 0) {
+ event_set(&db->ev, PQsocket(db->con), EV_WRITE, send_cb, db);
+ event_add(&db->ev, NULL);
+ } else if (res == 0) {
+ event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db);
+ event_add(&db->ev, NULL);
+ } else
+ conn_error(db, "PQflush");
+}
+
+static void connect_cb(int sock, short flags, void *arg)
+{
+ DbConn *db = arg;
+ PostgresPollingStatusType poll_res;
+
+ poll_res = PQconnectPoll(db->con);
+ switch (poll_res) {
+ case PGRES_POLLING_WRITING:
+ event_set(&db->ev, PQsocket(db->con), EV_WRITE, connect_cb, db);
+ event_add(&db->ev, NULL);
+ break;
+ case PGRES_POLLING_READING:
+ event_set(&db->ev, PQsocket(db->con), EV_READ, connect_cb, db);
+ event_add(&db->ev, NULL);
+ break;
+ case PGRES_POLLING_OK:
+ send_query(db);
+ break;
+ case PGRES_POLLING_ACTIVE:
+ case PGRES_POLLING_FAILED:
+ conn_error(db, "PQconnectPoll");
+ }
+}
+
+static void launch_connect(DbConn *db)
+{
+ /* launch new connection */
+ db->con = PQconnectStart(db->connstr);
+ if (db->con == NULL) {
+ conn_error(db, "PQconnectStart: no mem");
+ return;
+ }
+
+ if (PQstatus(db->con) == CONNECTION_BAD) {
+ conn_error(db, "PQconnectStart");
+ return;
+ }
+
+ event_set(&db->ev, PQsocket(db->con), EV_WRITE, connect_cb, db);
+ event_add(&db->ev, NULL);
+}
+
+static void handle_idle(DbConn *db)
+{
+ set_active(db);
+ if (db->con)
+ send_query(db);
+ else
+ launch_connect(db);
+}
+
+int main(void)
+{
+ int i;
+ DbConn *db;
+ List *item, *tmp;
+
+ for (i = 0; i < 10; i++) {
+ db = new_db("dbname=marko port=6000 host=/tmp");
+ db->query = "select 1";
+ list_append(&db->head, &idle_list);
+ }
+
+ event_init();
+
+ while (1) {
+ event_loop(EVLOOP_ONCE);
+ list_for_each_safe(item, &idle_list, tmp) {
+ db = container_of(item, DbConn, head);
+ handle_idle(db);
+ }
+ }
+ return 0;
+}
+
+
--- /dev/null
+#! /usr/bin/env python
+
+import sys, os, re, time, psycopg
+import threading, thread, random
+
+n_thread = 100
+longtx = 0
+tx_sleep = 0
+tx_sleep = 8
+
+conn_data = {
+ 'dbname': 'marko',
+ #'host': '127.0.0.1',
+ 'host': '/tmp',
+ 'port': '6000',
+ 'user': 'marko',
+ #'password': '',
+ 'connect_timeout': '5',
+}
+
+def get_connstr():
+ tmp = []
+ for k, v in conn_data.items():
+ tmp.append(k+'='+v)
+ return " ".join(tmp)
+
+class WorkThread(threading.Thread):
+ def __init__(self):
+ threading.Thread.__init__(self)
+ self.setDaemon(True)
+ self.stat_lock = threading.Lock()
+ self.query_cnt = 0
+
+ def inc_cnt(self):
+ self.stat_lock.acquire()
+ self.query_cnt += 1
+ self.stat_lock.release()
+
+ def fetch_cnt(self):
+ self.stat_lock.acquire()
+ val = self.query_cnt
+ self.query_cnt = 0
+ self.stat_lock.release()
+ return val
+
+ def run(self):
+ try:
+ time.sleep(random.random() * 10.0)
+ except: pass
+ while 1:
+ try:
+ self.main_loop()
+ except KeyboardInterrupt:
+ break
+ except SystemExit:
+ break
+ except Exception, d:
+ print d
+ try:
+ time.sleep(5)
+ except: pass
+
+ def main_loop(self):
+ db = psycopg.connect(get_connstr())
+ if not longtx:
+ db.autocommit(1)
+ n = 0
+ while n < 10:
+ self.do_work(db)
+ self.inc_cnt()
+ n += 1
+
+ def do_work(self, db):
+ curs = db.cursor()
+ q = "select pg_sleep(%.02f)" % (random.random() * 1)
+ curs.execute(q)
+ time.sleep(tx_sleep * random.random() + 1)
+ if longtx:
+ db.commit()
+
+def main():
+ print "connstr", get_connstr()
+
+ thread_list = []
+ while len(thread_list) < n_thread:
+ t = WorkThread()
+ t.start()
+ thread_list.append(t)
+
+ print "started %d threads" % len(thread_list)
+
+ last = time.time()
+ while 1:
+ time.sleep(1)
+ now = time.time()
+ dur = now - last
+ if dur >= 5:
+ last = now
+ cnt = 0
+ for t in thread_list:
+ cnt += t.fetch_cnt()
+ avg = cnt / dur
+ print "avg", avg
+
+if __name__ == '__main__':
+ try:
+ main()
+ except SystemExit:
+ pass
+ except KeyboardInterrupt:
+ pass
+ #except Exception, d:
+ # print d
+
--- /dev/null
+;; database name = connect string
+[databases]
+
+p0 = port=6666 host=127.0.0.1 dbname=p0 user=bouncer pool_size=2
+p1 = port=6666 host=127.0.0.1 dbname=p1 user=bouncer
+p2 = port=6668 host=127.0.0.1 dbname=p2 user=bouncer
+
+;; Configuation section
+[pgbouncer]
+
+;;;
+;;; Administrative settings
+;;;
+
+logfile = test.log
+pidfile = test.pid
+
+;;;
+;;; Where to wait for clients
+;;;
+
+; ip address or * which means all ip-s
+listen_addr = 127.0.0.1
+listen_port = 6667
+unix_socket_dir = /tmp
+
+;;;
+;;; Authentication settings
+;;;
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+#auth_file = 8.0/main/global/pg_auth
+auth_file = userlist.txt
+
+;;;
+;;; Pooler personality questions
+;;;
+
+; When server connection is released back to pool:
+; session - after client disconnects
+; transaction - after transaction finishes
+; statement - after statement finishes
+pool_mode = statement
+
+; When taking idle server into use, this query is ran first.
+;
+; Query for session pooling:
+; ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT
+; Query for statement/transaction pooling:
+; SELECT 1
+; Empty query disables the functionality
+server_check_query = select 1
+
+; If server was used more recently that this many seconds ago,
+; skip the check query. If 0, the check query is always ran.
+server_check_delay = 10
+
+;;;
+;;; Connection limits
+;;;
+
+; total number of clients that can connect
+max_client_conn = 10
+default_pool_size = 5
+
+;;;
+;;; Timeouts
+;;;
+
+; Close server connection if its been connected longer.
+server_lifetime = 120
+
+; Close server connection if its not been used in this time.
+; Allows to clean unneccessary connections from pool after peak.
+server_idle_timeout = 60
+
+; Cancel connection attepmt if server does not answer takes longer.
+server_connect_timeout = 15
+
+; If server login failed (server_connect_timeout or auth failure)
+; then wait this many second.
+server_login_retry = 15
+
+; Dangerous. Server connection is closed if query does not return
+; in this time. Should be used to survive network problems,
+; _not_ as statement_timeout. (default: 0)
+query_timeout = 20
+
+; Dangerous. Client connection is closed if no activity in this time.
+; Should be used to survive network problems. (default: 0)
+client_idle_timeout = 0
+
+
+;;;
+;;; Low-level tuning options
+;;;
+
+; buffer for streaming packets
+pkt_buf = 2048
+
+;;;
+;;; networking options, for info: man 7 tcp
+;;;
+
+; linux: notify program about new connection only if there
+; is also data received. (Seconds to wait.)
+tcp_defer_accept = 0
+
+;; following options are reloadable, but apply only to
+;; new connections.
+
+; in-kernel buffer size (linux default: 4096)
+tcp_socket_buffer = 0
+
+; whether tcp keepalive should be turned on (0/1)
+tcp_keepalive = 0
+
+;; following options are linux-specific.
+;; they also require tcp_keepalive=1
+
+; count of keepaliva packets
+tcp_keepcnt = 0
+
+; how long the connection can be idle,
+; before sending keepalive packets
+tcp_keepidle = 0
+
+; The time between individual keepalive probes.
+tcp_keepintvl = 0
+
+; By default, max tcp packet cannot be larger than pkt_buf.
+; If this is set, then bouncer tells to kernel to queue packets.
+; Then max pkt length is tcp_socket_buffer.
+tcp_buffer_more = 0
+
--- /dev/null
+#!/bin/sh
+
+# Notes:
+# - uses iptables and -F with some tests, probably not very friendly to your firewall
+# - uses nc (netcat) with some tests, skips if not in path
+# - assumes postgres 8.2 fix your path so that it comes first
+
+export PATH=/usr/lib/postgresql/8.2/bin:$PATH
+export PGDATA=$PWD/pgdata
+export PGHOST=localhost
+export PGPORT=6667
+export EF_ALLOW_MALLOC_0=1
+
+BOUNCER_LOG=test.log
+BOUNCER_INI=test.ini
+BOUNCER_PID=test.pid
+BOUNCER_PORT=`sed -n '/^listen_port/s/listen_port.*=[^0-9]*//p' $BOUNCER_INI`
+BOUNCER_EXE=./pgbouncer
+
+LOGDIR=log
+NC_PORT=6668
+PG_PORT=6666
+PG_LOG=$LOGDIR/pg.log
+
+pgctl() {
+ pg_ctl -o "-p $PG_PORT" -D $PGDATA $@ >>$PG_LOG 2>&1
+}
+
+mkdir -p $LOGDIR
+rm -f $BOUNCER_LOG $PG_LOG
+# rm -r $PGDATA
+
+if [ ! -d $PGDATA ]; then
+ mkdir $PGDATA
+ initdb >/dev/null 2>&1
+fi
+
+pgctl start
+sleep 5
+
+psql -p $PG_PORT -l |grep p0 > /dev/null || {
+ psql -p $PG_PORT -c "create user bouncer" template1
+ createdb -p $PG_PORT p0
+ createdb -p $PG_PORT p1
+}
+
+$BOUNCER_EXE -d $BOUNCER_INI
+sleep 1
+
+#
+# fw hacks
+#
+
+fw_drop_port() {
+ case `uname` in
+ Linux)
+ sudo iptables -A OUTPUT -p tcp --dport $1 -j DROP;;
+ Darwin)
+ sudo ipfw add 100 drop tcp from any to 127.0.0.1 dst-port $1;;
+ *)
+ echo "Unknown OS";;
+ esac
+}
+fw_reject_port() {
+ case `uname` in
+ Linux)
+ sudo iptables -A OUTPUT -p tcp --dport $1 -j REJECT --reject-with tcp-reset;;
+ Darwin)
+ sudo ipfw add 100 reset tcp from any to 127.0.0.1 dst-port $1;;
+ *)
+ echo "Unknown OS";;
+ esac
+}
+
+fw_reset() {
+ case `uname` in
+ Linux)
+ sudo iptables -F;;
+ Darwin)
+ sudo ipfw del 100;;
+ *)
+ echo "Unknown OS"; exit 1;;
+ esac
+}
+
+#
+# util functions
+#
+
+complete() {
+ test -f $BOUNCER_PID && kill `cat $BOUNCER_PID` >/dev/null 2>&1
+ pgctl -m fast stop
+ rm -f $BOUNCER_PID
+}
+
+die() {
+ echo $@
+ complete
+ exit 1
+}
+
+admin() {
+ psql -h /tmp -U pgbouncer pgbouncer -c "$@;" || die "Cannot contact bouncer!"
+}
+
+runtest() {
+ echo -n "`date` running $1 ... "
+ eval $1 >$LOGDIR/$1.log 2>&1
+ if [ $? -eq 0 ]; then
+ echo "SUCCESS"
+ else
+ echo "FAILED"
+ fi
+ date >> $LOGDIR/$1.log
+
+ # allow background processing to complete
+ wait
+ # start with fresh config
+ kill -HUP `cat $BOUNCER_PID`
+}
+
+# server_lifetime
+test_server_lifetime() {
+ admin "set server_lifetime=2"
+ psql -c "select now()" p0
+ sleep 3
+
+ rc=`psql -p $PG_PORT -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='p0'" p0`
+ psql -c "select now()" p0
+ return $rc
+}
+
+# server_idle_timeout
+test_server_idle_timeout() {
+ admin "set server_idle_timeout=2"
+ psql -c "select now()" p0
+ sleep 3
+ rc=`psql -p $PG_PORT -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='p0'" p0`
+ psql -c "select now()" p0
+ return $rc
+}
+
+# query_timeout
+test_query_timeout() {
+ admin "set query_timeout=3"
+ psql -c "select pg_sleep(5)" p0 && return 1
+ return 0
+}
+
+# client_idle_timeout
+test_client_idle_timeout() {
+ admin "set client_idle_timeout=2"
+ psql --set ON_ERROR_STOP=1 p0 <<-PSQL_EOF
+ select now();
+ \! sleep 3
+ select now();
+ PSQL_EOF
+ test $? -eq 0 && return 1
+ return 0
+}
+
+# server_login_retry
+test_server_login_retry() {
+ admin "set query_timeout=10"
+ admin "set server_login_retry=1"
+
+ (pgctl -m fast stop; sleep 3; pgctl start) &
+ sleep 1
+ psql -c "select now()" p0
+ rc=$?
+ wait
+ return $rc
+}
+
+# server_connect_timeout - uses netcat to start dummy server
+test_server_connect_timeout_establish() {
+ which nc >/dev/null || return 1
+
+ nc -l -p $NC_PORT >/dev/null &
+ admin "set query_timeout=3"
+ admin "set server_connect_timeout=2"
+ psql -c "select now()" p2
+ # client will always see query_timeout, need to grep for connect timeout
+ grep "closing because: connect timeout" $BOUNCER_LOG
+ # didnt seem to die otherwise
+ killall nc
+ return $?
+}
+
+# server_connect_timeout - block with iptables
+# XXX: for some reason bouncer says 'connect failed' not 'connect timeout'
+test_server_connect_timeout_reject() {
+ test -z $CAN_SUDO && return 1
+ admin "set query_timeout=5"
+ admin "set server_connect_timeout=3"
+ fw_drop_port $PG_PORT
+ psql -c "select now()" p0
+ fw_reset
+ # client will always see query_timeout, need to grep for connect timeout
+ grep "closing because: connect failed" $BOUNCER_LOG
+}
+
+# server_check_delay
+test_server_check_delay() {
+ test -z $CAN_SUDO && return 1
+
+ admin "set server_check_delay=2"
+ admin "set server_login_retry=3"
+ admin "set query_timeout=10"
+
+ psql p0 -c "select now()"
+ fw_reject_port $PG_PORT
+ sleep 3
+ psql -tAq p0 -c "select 1" >$LOGDIR/test.tmp &
+ sleep 1
+ fw_reset
+ echo `date` rules flushed
+ wait
+ echo `date` done waiting
+
+ test "`cat $LOGDIR/test.tmp`" = "1"
+}
+
+# max_client_conn
+test_max_client_conn() {
+ admin "set max_client_conn=5"
+ admin "show config"
+
+ for i in `seq 1 4`; do
+ psql p1 -c "select now() as sleeping from pg_sleep(3);" &
+ done
+
+ # last conn allowed
+ psql p1 -c "select now() as last_conn" || return 1
+
+ # exhaust it
+ psql p1 -c "select now() as sleeping from pg_sleep(3);" &
+ sleep 1
+
+ # shouldn't be allowed
+ psql p1 -c "select now() as exhausted" && return 1
+
+ # should be ok
+ echo 'waiting for clients to complete ...'
+ wait
+ psql p1 -c "select now() as ok" || return 1
+
+ return 0
+}
+
+# - max pool size
+test_pool_size() {
+
+ docount() {
+ for i in `seq 10`; do
+ psql $1 -c "select pg_sleep(0.5)" &
+ done
+ wait
+ cnt=`psql -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='$1'" $1`
+ echo $cnt
+ }
+
+ test `docount p0` -ne 2 && return 1
+ test `docount p1` -ne 5 && return 1
+
+ return 0
+}
+
+# test online restart while clients running
+test_online_restart() {
+ for i in `seq 1 5`; do
+ for j in `seq 1 10`; do
+ psql -c "select now() as sleeping from pg_sleep(0.2)" p0 &
+ done
+
+ pid1=`cat $BOUNCER_PID`
+ echo "old bouncer is $pid1"
+ $BOUNCER_EXE -d -R $BOUNCER_INI
+ sleep 2
+ pid2=`cat $BOUNCER_PID`
+ echo "new bouncer is $pid2"
+ [ $pid1 = $pid2 ] && return 1
+ done
+ return 0
+}
+
+# test pause/resume
+test_pause_resume() {
+ rm -f $LOGDIR/test.tmp
+ for i in `seq 1 50`; do
+ psql -tAq p0 -c 'select 1 from pg_sleep(0.1)' >>$LOGDIR/test.tmp
+ done &
+
+ for i in `seq 1 5`; do
+ admin "pause"
+ sleep 1
+ admin "resume"
+ sleep 1
+ done
+
+ wait
+ test `wc -l <$LOGDIR/test.tmp` -eq 50
+}
+
+# test suspend/resume
+test_suspend_resume() {
+ rm -f $LOGDIR/test.tmp
+ for i in `seq 1 50`; do
+ psql -tAq p0 -c 'select 1 from pg_sleep(0.1)' >>$LOGDIR/test.tmp
+ done &
+
+ for i in `seq 1 5`; do
+ psql -h /tmp -p $BOUNCER_PORT pgbouncer -U pgbouncer <<-PSQL_EOF
+ suspend;
+ \! sleep 1
+ resume;
+ \! sleep 1
+ PSQL_EOF
+ done
+
+ wait
+ test `wc -l <$LOGDIR/test.tmp` -eq 50
+}
+
+# test pool database restart
+test_database_restart() {
+ admin "set server_login_retry=1"
+
+ psql p0 -c "select now() as p0_before_restart"
+ pgctl -m fast restart
+ echo `date` restart 1
+ psql p0 -c "select now() as p0_after_restart" || return 1
+
+
+ # do with some more clients
+ for i in `seq 1 5`; do
+ psql p0 -c "select pg_sleep($i)" &
+ psql p1 -c "select pg_sleep($i)" &
+ done
+
+ pgctl -m fast restart
+ echo `date` restart 2
+
+ wait
+ psql p0 -c "select now() as p0_after_restart" || return 1
+}
+
+# test connect string change
+test_database_change() {
+ admin "set server_lifetime=2"
+
+ db1=`psql -tAq p1 -c "select current_database()"`
+
+ cp test.ini test.ini.bak
+ sed 's/\(p1 = port=6666 host=127.0.0.1 dbname=\)\(p1\)/\1p0/g' test.ini >test2.ini
+ mv test2.ini test.ini
+
+ kill -HUP `cat $BOUNCER_PID`
+
+ sleep 3
+ db2=`psql -tAq p1 -c "select current_database()"`
+
+ echo "db1=$db1 db2=$db2"
+ cp test.ini.bak test.ini
+ rm test.ini.bak
+
+ admin "show databases"
+ admin "show pools"
+
+ test $db1 = "p1" -a $db2 = "p0"
+}
+
+echo "Testing for sudo access."
+sudo true && CAN_SUDO=1
+
+testlist="
+test_server_login_retry
+test_client_idle_timeout
+test_server_lifetime
+test_server_idle_timeout
+test_query_timeout
+test_server_connect_timeout_establish
+test_server_connect_timeout_reject
+test_server_check_delay
+test_max_client_conn
+test_pool_size
+test_online_restart
+test_pause_resume
+test_suspend_resume
+test_database_restart
+test_database_change
+"
+
+if [ $# -gt 0 ]; then
+ testlist=$@
+fi
+
+for test in $testlist
+do
+ runtest $test
+done
+
+complete
+
--- /dev/null
+"marko" "asdasd"
+"postgres" "asdasd"
+"pgbouncer" "fake"