]> granicus.if.org Git - pgbouncer/commitdiff
Initial revision
authorMarko Kreen <markokr@gmail.com>
Tue, 13 Mar 2007 15:31:43 +0000 (15:31 +0000)
committerMarko Kreen <markokr@gmail.com>
Tue, 13 Mar 2007 15:31:43 +0000 (15:31 +0000)
57 files changed:
AUTHORS [new file with mode: 0644]
COPYRIGHT [new file with mode: 0644]
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
config.mak.in [new file with mode: 0644]
configure.ac [new file with mode: 0644]
debian/changelog [new file with mode: 0644]
debian/packages [new file with mode: 0644]
doc/Makefile [new file with mode: 0644]
doc/config.txt [new file with mode: 0644]
doc/overview.txt [new file with mode: 0644]
doc/todo.txt [new file with mode: 0644]
doc/usage.txt [new file with mode: 0644]
etc/pgbouncer.ini [new file with mode: 0644]
etc/small.ini [new file with mode: 0644]
etc/test.ini [new file with mode: 0644]
etc/test.users [new file with mode: 0644]
etc/userlist.txt [new file with mode: 0644]
src/admin.c [new file with mode: 0644]
src/admin.h [new file with mode: 0644]
src/bouncer.h [new file with mode: 0644]
src/client.c [new file with mode: 0644]
src/client.h [new file with mode: 0644]
src/janitor.c [new file with mode: 0644]
src/janitor.h [new file with mode: 0644]
src/list.h [new file with mode: 0644]
src/loader.c [new file with mode: 0644]
src/loader.h [new file with mode: 0644]
src/main.c [new file with mode: 0644]
src/mbuf.h [new file with mode: 0644]
src/md5.c [new file with mode: 0644]
src/md5.h [new file with mode: 0644]
src/objects.c [new file with mode: 0644]
src/objects.h [new file with mode: 0644]
src/pktbuf.c [new file with mode: 0644]
src/pktbuf.h [new file with mode: 0644]
src/pooler.c [new file with mode: 0644]
src/pooler.h [new file with mode: 0644]
src/proto.c [new file with mode: 0644]
src/proto.h [new file with mode: 0644]
src/sbuf.c [new file with mode: 0644]
src/sbuf.h [new file with mode: 0644]
src/server.c [new file with mode: 0644]
src/server.h [new file with mode: 0644]
src/stats.c [new file with mode: 0644]
src/stats.h [new file with mode: 0644]
src/system.h [new file with mode: 0644]
src/takeover.c [new file with mode: 0644]
src/takeover.h [new file with mode: 0644]
src/util.c [new file with mode: 0644]
src/util.h [new file with mode: 0644]
test/Makefile [new file with mode: 0644]
test/asynctest.c [new file with mode: 0644]
test/stress.py [new file with mode: 0755]
test/test.ini [new file with mode: 0644]
test/test.sh [new file with mode: 0755]
test/userlist.txt [new file with mode: 0644]

diff --git a/AUTHORS b/AUTHORS
new file mode 100644 (file)
index 0000000..dfceaba
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,4 @@
+
+Marko Kreen <marko.kreen@skype.net>    - main coder
+Martin Pihlak <martin.pihlak@skype.net>        - head inquisitor
+
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644 (file)
index 0000000..5d264f7
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,16 @@
+PgBouncer - Lightweight connection pooler for PostgreSQL.
+
+Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..95cfe57
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,120 @@
+
+# sources
+SRCS = client.c loader.c objects.c pooler.c proto.c sbuf.c server.c util.c \
+       admin.c stats.c takeover.c md5.c janitor.c pktbuf.c main.c
+HDRS = client.h loader.h objects.h pooler.h proto.h sbuf.h server.h util.h \
+       admin.h stats.h takeover.h md5.h janitor.h pktbuf.h bouncer.h \
+       list.h mbuf.h system.h
+
+# data & dirs to include in tgz
+DATA = README etc/pgbouncer.ini Makefile config.mak.in config.h.in \
+       configure configure.ac debian/packages debian/changelog
+DIRS = etc src debian
+
+# keep autoconf stuff separate
+-include config.mak
+
+# calculate full-path values
+OBJS = $(SRCS:.c=.o)
+hdrs = $(addprefix $(srcdir)/src/, $(HDRS))
+srcs = $(addprefix $(srcdir)/src/, $(SRCS))
+objs = $(addprefix $(builddir)/lib/, $(OBJS))
+FULL = $(PACKAGE_TARNAME)-$(PACKAGE_VERSION)
+DISTFILES = $(DIRS) $(DATA) $(srcs) $(hdrs)
+
+# Quiet by default, 'make V=1' shows commands
+V=0
+ifeq ($(V), 0)
+Q = @
+E = @echo
+else
+Q = 
+E = @true
+endif
+
+## actual targets now ##
+
+# default target
+all: pgbouncer
+
+# final executable
+pgbouncer: config.mak $(objs)
+       $(E) "  LD" $@
+       $(Q) $(CC) -o $@ $(LDFLAGS) $(objs) $(LIBS)
+
+# objects depend on all the headers
+$(builddir)/lib/%.o: $(srcdir)/src/%.c config.mak $(hdrs)
+       @mkdir -p $(builddir)/lib
+       $(E) "  CC" $<
+       $(Q) $(CC) -c -o $@ $< $(DEFS) $(CFLAGS) $(CPPFLAGS)
+
+# install binary and other stuff
+install: pgbouncer
+       mkdir -p $(DESTDIR)$(bindir)
+       mkdir -p $(DESTDIR)$(docdir)
+       $(BININSTALL) -m 755 pgbouncer $(DESTDIR)$(bindir)
+       $(INSTALL) -m 644 $(srcdir)/etc/pgbouncer.ini  $(DESTDIR)$(docdir)
+
+# create tarfile
+tgz: config.mak $(DISTFILES)
+       rm -rf $(FULL) $(FULL).tgz
+       mkdir $(FULL)
+       (for f in $(DISTFILES); do echo $$f; done) | cpio -p $(FULL)
+       tar czf $(FULL).tgz $(FULL)
+       rm -rf $(FULL)
+
+# create debian package
+deb: configure
+       yada rebuild
+       debuild -uc -us -b
+
+# clean object files
+clean:
+       rm -f *~ src/*~ *.o src/*.o lib/*.o pgbouncer core core.*
+
+# clean configure results
+distclean: clean
+       rm -f config.h config.log config.status config.mak
+       rm -rf lib autom4te*
+
+# clean autoconf results
+realclean: distclean
+       rm -f aclocal* config.h.in configure depcomp install-sh missing
+       rm -f tags
+
+# generate configure script and config.h.in
+boot: distclean
+       autoreconf -i -f
+       rm -rf autom4te* config.h.in~
+
+# targets can depend on this to force ./configure
+config.mak::
+       @test -f configure || { \
+                echo "Please run 'make boot && ./configure' first.";exit 1;}
+       @test -f $@ || { echo "Please run ./configure first.";exit 1;}
+
+# targets can depend on this to force 'make boot'
+configure::
+       @test -f $@ || { echo "Please run 'make boot' first.";exit 1;}
+
+# create tags file
+tags: $(srcs) $(hdrs)
+       if test -f ../libevent/event.h; then \
+         ctags $(srcs) $(hdrs) ../libevent/*.[ch]; \
+       else \
+         ctags $(srcs) $(hdrs); \
+       fi
+
+# fixes for macos
+SPARSE_MACOS=-D__STDC_VERSION__=199901 -D__LP64__=0 -DSENDFILE=1 \
+               -I/usr/lib/gcc/i486-linux-gnu/4.1.2/include
+# sparse does not have any identity
+SPARCE_FLAGS=-D__LITTLE_ENDIAN__ -D__i386__ -D__GNUC__=3 -D__GNUC_MINOR__=0 \
+               -Wno-transparent-union \
+               -Wall $(SPARSE_MACOS) $(CPPFLAGS) $(DEFS)
+
+# run sparse over code
+check: config.mak
+       $(E) "  CHECK" $(srcs)
+       $(Q) sparse $(SPARCE_FLAGS) $(srcs)
+
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..9fbdbd0
--- /dev/null
+++ b/README
@@ -0,0 +1,22 @@
+
+PgBouncer
+=========
+
+Lightweight connection pooler for PostgreSQL.
+
+Docs:   http://developer.skype.com/SkypeGarage/DbProjects/PgBouncer
+Source: http://pgfoundry.org/projects/pgbouncer
+
+Building
+---------
+
+PgBouncer uses libevent for low-level socket handling.  When this is
+installed just run:
+
+       $ ./configure --prefix=/usr/local --with-libevent=/prefix
+       $ make
+       $ make install
+
+If the OS does not have libevent available as package, it can be
+downloaded from http://monkey.org/~provos/libevent/
+
diff --git a/config.mak.in b/config.mak.in
new file mode 100644 (file)
index 0000000..7eb4677
--- /dev/null
@@ -0,0 +1,33 @@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+
+DEFS = @DEFS@
+LIBS = @LIBS@
+CC = @CC@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CFLAGS = @CFLAGS@
+LDFLAGS = @LDFLAGS@
+
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+bindir = @bindir@
+datarootdir = @datarootdir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+docdir = @docdir@
+
+top_builddir = @top_builddir@
+srcdir = @srcdir@
+abs_srcdir = @abs_srcdir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+builddir = @builddir@
+abs_builddir = @abs_builddir@
+abs_top_builddir = @abs_top_builddir@
+
+# autoconf does not want to find 'install'
+# if im not using automake...
+INSTALL = @INSTALL@
+BININSTALL = @BININSTALL@
+
diff --git a/configure.ac b/configure.ac
new file mode 100644 (file)
index 0000000..9efbb56
--- /dev/null
@@ -0,0 +1,115 @@
+dnl Process this file with autoconf to produce a configure script.
+
+AC_INIT(pgbouncer, 1.0)
+AC_CONFIG_SRCDIR(src/bouncer.h)
+AC_CONFIG_HEADER(config.h)
+
+dnl Checks for programs.
+AC_PROG_CC
+AC_PROG_CPP
+
+dnl Additional gcc tuning
+if test x"$GCC" = xyes; then
+  AC_MSG_CHECKING([for working warning swithces])
+  good_CFLAGS="$CFLAGS"
+  good="-Wall"
+  flags="-Wextra"
+  # turn off noise from Wextra
+  flags="$flags -Wno-unused-parameter -Wno-sign-compare"
+  flags="$flags -Wno-missing-field-initializers"
+  # Wextra does not turn those on?
+  flags="$flags -Wmissing-prototypes -Wpointer-arith -Wendif-labels"
+  flags="$flags -Wdeclaration-after-statement -Wold-style-definition"
+  flags="$flags -Wstrict-prototypes"
+  for f in $flags; do
+    CFLAGS="$good_CFLAGS $good $f"
+    AC_COMPILE_IFELSE([void foo(void){}], [good="$good $f"])
+  done
+  CFLAGS="$good_CFLAGS $good"
+  AC_MSG_RESULT([$good])
+fi
+
+dnl Checks for header files.
+AC_CHECK_HEADERS([crypt.h sys/socket.h sys/ucred.h])
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_INLINE
+AC_TYPE_PID_T
+AC_TYPE_SIZE_T
+AC_TYPE_UINT8_T
+AC_TYPE_UINT32_T
+AC_TYPE_UINT64_T
+AC_SYS_LARGEFILE
+
+dnl Checks for library functions.
+AC_CHECK_FUNCS(strlcpy strlcat)
+AC_SEARCH_LIBS(crypt, crypt, [], AC_MSG_ERROR([crypt not found]))
+
+dnl Find libevent
+AC_MSG_CHECKING([for libevent])
+AC_ARG_WITH(libevent,
+  AC_HELP_STRING([--with-libevent=prefix],[Specify where libevent is installed]),
+  [ test "$withval" = "no" && AC_MSG_ERROR("cannot work without libevent")
+    CPPFLAGS="$CPPFLAGS -I$withval/include"
+    LDFLAGS="$LDFLAGS -L$withval/lib" ])
+LIBS="$LIBS -levent"
+AC_LINK_IFELSE([
+  #include <sys/types.h>
+  #include <sys/time.h>
+  #include <stdio.h>
+  #include <event.h>
+  int main(void) {
+    struct event ev;
+    event_init();
+    event_set(&ev, 1, EV_READ, NULL, NULL);
+  } ],
+[AC_MSG_RESULT([found])],
+[AC_MSG_ERROR([not found])])
+
+# autoconf does not want to find 'install', if not using automake...
+INSTALL=install
+
+AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug],[build binary with debugging symbols]))
+AC_MSG_CHECKING([whether to build debug binary])
+if test "$enable_debug" = "yes"; then
+  LDFLAGS="-g $LDFLAGS"
+  CFLAGS="`echo $CFLAGS | sed -e 's/-O2/-O/g'`"
+  BININSTALL="$INSTALL"
+  AC_MSG_RESULT([yes])
+else
+  if test x"$GCC" = xyes; then
+    CFLAGS="$CFLAGS -fomit-frame-pointer"
+  fi
+  BININSTALL="$INSTALL -s"
+  AC_MSG_RESULT([no])
+fi
+AC_SUBST(INSTALL)
+AC_SUBST(BININSTALL)
+
+AC_ARG_ENABLE(cassert, AC_HELP_STRING([--enable-cassert],[turn on assert checking in code]))
+AC_MSG_CHECKING([whether to enable asserts])
+if test "$enable_cassert" = "yes"; then
+  AC_DEFINE(CASSERT, 1, [Define to enable assert checking])
+  AC_MSG_RESULT([yes])
+else
+  AC_MSG_RESULT([no])
+fi
+
+AC_ARG_ENABLE(werror, AC_HELP_STRING([--enable-werror],[add -Werror to CFLAGS]))
+AC_MSG_CHECKING([whether to fail on warnings])
+if test "$enable_werror" = "yes"; then
+  CFLAGS="$CFLAGS -Werror"
+  AC_MSG_RESULT([yes])
+else
+  AC_MSG_RESULT([no])
+fi
+
+dnl Output findings
+AC_OUTPUT([config.mak])
+
+dnl If separate build dir, link Makefile over
+test -f Makefile || {
+  echo "Linking Makefile"
+  ln -s $srcdir/Makefile
+}
+
diff --git a/debian/changelog b/debian/changelog
new file mode 100644 (file)
index 0000000..da9cacc
--- /dev/null
@@ -0,0 +1,5 @@
+pgbouncer (1.0) unstable; urgency=low
+
+  * Public release.
+
+ -- Marko Kreen <marko.kreen@skype.net>  Tue, 13 Mar 2007 17:30:02 +0200
diff --git a/debian/packages b/debian/packages
new file mode 100644 (file)
index 0000000..aca0857
--- /dev/null
@@ -0,0 +1,27 @@
+## debian/packages for pgbouncer
+
+Source: pgbouncer
+Section: contrib/misc
+Priority: extra
+Maintainer: Marko Kreen <marko.kreen@skype.net>
+Standards-Version: 3.6.2
+Description: Lightweight connection pooler for PostgreSQL 
+Copyright: BSD
+ Copyright 2007 Marko Kreen, Skype Technologies
+Build: sh
+ CPPFLAGS="-I$HOME/src/libevent" \
+ LDFLAGS="-L$HOME/src/libevent/.libs" \
+ ./configure --prefix=/usr --enable-debug --enable-cassert
+ make V=1
+Clean: sh
+ make clean || true
+#Build-Depends: libevent-dev
+
+Package: pgbouncer
+Architecture: any
+Contains: unstripped
+Depends: []
+Description: Lightweight connection pooler for PostgreSQL
+ .
+Install: sh
+ make install DESTDIR=$ROOT
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644 (file)
index 0000000..68b19e1
--- /dev/null
@@ -0,0 +1,12 @@
+
+wiki = https://developer.skype.com/SkypeGarage/DbProjects/PgBouncer
+web = mkz@shell.pgfoundry.org:/home/pgfoundry.org/groups/pgbouncer/htdocs/
+
+all:
+
+upload:
+       devupload.sh overview.txt $(wiki)
+       devupload.sh todo.txt $(wiki)/ToDo
+       devupload.sh usage.txt $(wiki)/UsageInfo
+       devupload.sh config.txt $(wiki)/ConfigFile
+
diff --git a/doc/config.txt b/doc/config.txt
new file mode 100644 (file)
index 0000000..b842ae4
--- /dev/null
@@ -0,0 +1,229 @@
+#pragma section-numbers 2
+
+= PgBouncer Config =
+
+[[TableOfContents]]
+
+Config file is in "ini" format.  Section names are between "[" and "]".
+Lines starting with ";" or "#" are taken as comment and ignored.  The characters
+";" and "#" are not recognized when they appear later in the line.
+
+
+
+== Section [pgbouncer] ==
+
+=== Generic settings ===
+
+==== logfile ====
+Specifies log file.  Logging is done by open/write/close, so it can be safely
+rotasted, without informing pooler.
+
+Default: not set.
+
+
+==== pidfile ====
+Specifies pid file.  Without pidfile, the daemonization is not allowed.
+
+Default: not set.
+
+
+==== listen_addr ====
+Specifies IPv4 address, where to listen for TCP connections.  Or "*"
+meaning "listen on all addresses".   When not set, only unix socket
+connections are allowed.
+
+Default: not set.
+
+==== listen_port ====
+On which port to listen on.  Applies to both TCP and Unix sockets.
+
+Default: 6000
+
+==== unix_socket_dir ====
+Specifies location for Unix sockets.  Applies to both listening socket
+and server connections.  If set to empty string, Unix sockets are disabled.
+
+Default: /tmp
+
+==== auth_file ====
+
+Load user names and passwords from this file.  File format used
+is same as for PostgreSQL pg_auth/pg_pwd file, so can be pointed
+directly to backend file.
+
+Default: not set.
+
+==== auth_type ====
+How to authenticate users.
+
+ md5::
+       Use MD5-based password check.  auth_file may contain both md5-encrypted
+       or plain-text passwords.
+
+ crypt::
+       Use crypt(3) based bassword check.  auth_file must contain plain-text
+       passwords.
+
+ plain::
+       Clear-text password is sent over wire.
+
+ trust::
+       No authentication is done.  Username must still exists in auth_file.
+
+ any::
+       Like `trust` but username given is ignored.  Requires that all databases
+       have configured to log in as specific user.
+
+Default: md5
+
+==== pool_mode ====
+Specifies when server connection is tagged as reusable for other clients.
+
+ session::
+       Server is released back to pool after client disconnects.
+
+ transaction::
+       Server is released back to pool after transaction finishes.
+
+ statement::
+       Server is released back to pool after query finishes.  Long transactions
+       spanning multiple statements are disallowed in this mode.
+
+Default: `session`.
+
+==== max_client_conn ====
+
+Maximin number of client connections allowed.
+
+==== default_pool_size ====
+
+How many server connection to allow per user/database pair.
+Can be overrided in per-database config.
+
+=== Console access control ===
+
+==== admin_users ====
+List of users that are allowed to run all commands on console.
+
+==== stats_users ====
+
+List of users that are allowed to run read-only queries on console.
+Thats means all SHOW commands except SHOW FDS.
+
+=== Connection sanity checks, timeouts ===
+
+==== server_check_delay ====
+
+How long to keep released immidiately available, without running
+sanity-check query on it.  If 0 then the query is ran always.
+
+==== server_check_query ====
+
+Good variants are `SELECT 1;`, to just see if connection is alive
+and `ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT`
+to do full reset.
+
+If empty string, then sanity checking is disabled.
+
+==== server_lifetime ====
+
+Pooler tries to close server connections that are been connected
+longer than this.
+
+==== server_idle_timeout ====
+
+If server connection has been idle more than this then there's too many
+connections in the pool and this on can be dropped.
+
+==== server_connect_timeout ====
+
+If connection and login wont finish in this time, the connection will
+be closed.
+
+==== server_login_retry ====
+
+If login failed, because of failure from connect() or authentication
+that pooler waits this much before retrying to connect.
+
+==== query_timeout ====
+
+Queries running longer than that are canceled.  This should be used
+only with slightly smaller server-side statement_timeout, to apply only
+for network problems.
+
+Default: 0 (disabled)
+
+==== client_idle_timeout ====
+
+Client connections idling longer than that are closed.
+
+Default: 0 (disabled)
+
+=== Low-level network settings ===
+
+==== pkt_buf ====
+
+Internal buffer size for packets.  Affects size of TCP packets sent
+and general memory usage.  Actual libpq packets can be larger than this
+so no need to set it large.
+
+Default: 2048
+
+==== tcp_defer_accept ====
+
+Details about following options shouldbe looked from `man 7 tcp`
+
+Default: 45 on Linux, otherwise 0
+
+==== tcp_socket_buffer ====
+
+Default: not set
+
+==== tcp_keepalive ====
+
+Default: Not set
+
+==== tcp_keepcnt ====
+Default: not set
+
+==== tcp_keepidle ====
+Default: not set
+==== tcp_keepintvl ====
+Default: not set
+
+== Section [databases] ==
+
+This contains key=value pairs where key will be taken as database name and value as
+libpq-connstring style list of key=value pairs.  As actual libpq is not used, so
+not all features from libpq can be used (service=, quoting).
+
+=== dbname ===
+
+Destination database name.
+
+Default: same as client-side database name.
+
+=== host ===
+
+IP-address to connect to.
+
+Default: not set, meaning to use unix-socket.
+
+=== port ===
+
+Default: 5432
+
+=== user, password ===
+
+If user= is set, all connections to destination database will be done
+with that user, meaning that there will be only one pool for this database.
+
+Otherwise pgbouncer tries to log into destination database with client username,
+meaning that there will be one pool per user.
+
+=== client_encoding, datestyle ===
+
+As pgbouncer does not pass client startup packet to server, there is no way of specifying
+startup paramenters to dest database.  These paramenters make possible to set startup
+paramenters in pgbouncer config.  Escpecially, client_encoding=UNICODE is needed to work
+around JDBC driver bug.
diff --git a/doc/overview.txt b/doc/overview.txt
new file mode 100644 (file)
index 0000000..9c04630
--- /dev/null
@@ -0,0 +1,44 @@
+
+= PgBouncer =
+
+Lightweight connection pooler for PostgreSQL.
+
+Downloads, bugtracker, CVS: http://pgfoundry.org/projects/pgbouncer
+
+== Features ==
+
+ * Several levels of brutality when rotating connections:
+
+  Session pooling::
+       Most polite method.  When client connects, a server connection
+       will be assigned to it for the whole duration it stays connected.
+       When client disconnects, the server connection will be put back
+       into pool.
+
+  Transaction pooling::
+       Server connection is assigned to client only during a transaction.
+       When PgBouncer notices that transaction is over, the server
+       will be put back into pool.
+
+  Statement pooling::
+       Most aggressive method.  The server connection will be put back into
+       pool immidiately after a query completes.  Multi-statement
+       transactions are disallowed in this mode as they would break.
+
+ * Low memory requirements (2k per connection by default).  This is due
+ to the fact that PgBouncer does not need to see full packet at once.
+
+ * It is not tied to one backend server, the destination databases can
+ reside on different hosts.
+
+ * Supports online reconfiguration for most of the settings.
+
+ * Supports online restart - is able transfer sockets to new process.
+
+ * Supports protocol V3 only, so backend version must be >= 7.4.
+
+== Docs ==
+
+ * Detailed usage info: ./UsageInfo
+ * COnfig file help: ./ConfigFile
+ * TODO list: ./ToDo
diff --git a/doc/todo.txt b/doc/todo.txt
new file mode 100644 (file)
index 0000000..9e576de
--- /dev/null
@@ -0,0 +1,21 @@
+= PgBouncer TODO list =
+
+ * -R should detect that no pooler is running and boot normally
+ * -R should detect if login fails then exit()
+ * PAUSE <db>; RESUME <db>;
+
+ * keep stats about error counts?
+ * SHUTDOWN cmd should print notice?
+ * before loading users, disable all existing?
+
+ * log_connects, log_disconnects settings
+
+== Bugs ==
+
+ * Bouncer can get into situation where SUSPEND wont work (stalls),
+ thus making reboot impossible.
+
+ * Light load with small server_check_delay creates situation where
+ some clients may never get server connection.  Applied a hack to
+ fix this, needs more analysis.
+
diff --git a/doc/usage.txt b/doc/usage.txt
new file mode 100644 (file)
index 0000000..960800f
--- /dev/null
@@ -0,0 +1,153 @@
+#pragma section-numbers 2
+
+= PgBouncer usage details =
+
+[[TableOfContents]]
+
+== Building ==
+
+PgBouncer uses [http://monkey.org/~provos/libevent/ libevent]
+for low-level socket handling.  When this is installed just run:
+
+{{{
+$ ./configure --prefix=/usr/local --with-libevent=/prefix
+$ make
+$ make install
+}}}
+
+== Command line usage ==
+{{{
+pgbouncer [-d][-R][-v] config.ini
+pgbouncer -V|-h
+}}}
+
+Where switches are:
+
+ -d::
+       Run in background.  Without it the process will run in foreground.
+
+ -R::
+       Do a online restart.  That means connecting to running process,
+       loading open sockets from it and using them.
+
+ -v::
+       Increase verbosity.
+
+ -V::
+       Show version.
+
+ -h::
+       Show short help.
+
+
+== Admin Console ==
+
+There is always a extra database available: "pgbouncer".
+When connecected to it, there is possible to look and change
+pooler settings.
+
+{{{
+SHOW STATS;
+}}}
+Shows statistics.
+
+{{{
+SHOW SERVERS;
+SHOW CLIENTS;
+SHOW POOLS;
+SHOW LISTS;
+}}}
+Shows internal info.
+
+{{{
+SHOW USERS;
+SHOW DATABASES;
+}}}
+Shows loaded users and databases.
+
+{{{
+SHOW FDS;
+}}}
+Shows list of fds in use.  When the connected user has username
+"pgbouncer", connects thru unix socket and has same UID as running process
+the actual fds are passed over connection.  This mechanism is used
+to do online restart.
+
+{{{
+PAUSE;
+}}}
+PgBouncer tries to disconnect from all servers, first waiting for
+all queries to complete.  The command will not return before all is done.
+
+{{{
+SUSPEND;
+}}}
+
+All socket buffers are flushed and PgBouncer stops listening data on them.
+The command will not return before all is done.
+
+{{{
+RESUME
+}}}
+
+Resume work from previous PAUSE or SUSPEND command.
+
+{{{
+SHUTDOWN
+}}}
+The PgBouncer process will exit.
+
+
+== Online restart ==
+
+PgBouncer supports restart without dropping connections.  When launched
+with switch "-R", it will connect to running PgBouncer process via
+unix socket and issues commands:
+
+{{{
+SUSPEND;
+SHOW FDS;
+SHUTDOWN;
+}}}
+Then it waits until old process shuts down and then starts listening on
+aquired sockets.
+
+== Signals ==
+
+ SIGHUP::
+       Reload config.
+
+ SIGINT::
+       Safe shutdown.
+
+ SIGTERM::
+       Immidiate shutdown.
+
+== libevent settings ==
+
+From libevent docs:
+
+{{{
+It is possible to disable support for epoll, kqueue, devpoll, poll or select
+by setting the environment variable EVENT_NOEPOLL, EVENT_NOKQUEUE, EVENT_NODEVPOLL,
+EVENT_NOPOLL or EVENT_NOSELECT, respectively.  By setting the environment variable
+EVENT_SHOW_METHOD, libevent displays the kernel notification method that it uses.
+}}}
+
+== Authentication file format ==
+
+PgBouncer needs its own user database.  The users are loaded from
+text file that should be in same format as PostgreSQL's pg_auth/pg_pwd
+file.
+
+{{{
+"username1" "password" ...
+"username2" "md12342345234" ...
+}}}
+
+There shoud be at least 2 fields, surrounded by double quotes.  First
+is username and second either plain-text or md5-hashed password.
+PgBouncer ignores rest of the line.
+
+Such file format allows to direct PgBouncer directly to PostgreSQL
+user file under data directory.
diff --git a/etc/pgbouncer.ini b/etc/pgbouncer.ini
new file mode 100644 (file)
index 0000000..027ef28
--- /dev/null
@@ -0,0 +1,142 @@
+;; database name = connect string
+[databases]
+
+; foodb over unix socket
+foodb =
+
+; redirect bardb to bazdb on localhost
+bardb = host=127.0.0.1 dbname=bazdb
+
+; acceess to dest database will go with single user
+forcedb = host=127.0.0.1 port=300 user=baz password=foo client_encoding=UNICODE datestyle=ISO
+
+;; Configuation section
+[pgbouncer]
+
+;;;
+;;; Administrative settings
+;;;
+
+logfile = pgbouncer.log
+pidfile = pgbouncer.pid
+
+;;;
+;;; Where to wait for clients
+;;;
+
+; ip address or * which means all ip-s
+listen_addr = 127.0.0.1
+listen_port = 6000
+unix_socket_dir = /tmp
+
+;;;
+;;; Authentication settings
+;;;
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+#auth_file = 8.0/main/global/pg_auth
+auth_file = etc/userlist.txt
+
+;;;
+;;; Users allowed into database 'pgbouncer'
+;;;
+
+; comma-separated list of users, who are allowed to change settings
+admin_users = user2, someadmin, otheradmin
+
+; comma-separated list of users who are just allowed to use SHOW command
+stats_users = stats, root
+
+;;;
+;;; Pooler personality questions
+;;;
+
+; When server connection is released back to pool:
+;   session      - after client disconnects
+;   transaction  - after transaction finishes
+;   statement    - after statement finishes
+pool_mode = session
+
+; When taking idle server into use, this query is ran first.
+;
+; Query for session pooling:
+;   ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT
+; Query for statement/transaction pooling:
+;   SELECT 1
+; Empty query disables the functionality
+server_check_query = select 1
+
+; If server was used more recently that this many seconds ago,
+; skip the check query.  If 0, the check query is always ran.
+server_check_delay = 10
+
+;;;
+;;; Connection limits
+;;;
+
+; total number of clients that can connect
+max_client_conn = 100
+default_pool_size = 20
+
+;;;
+;;; Timeouts
+;;;
+
+;; Close server connection if its been connected longer.
+;server_lifetime = 1200
+
+;; Close server connection if its not been used in this time.
+;; Allows to clean unneccessary connections from pool after peak.
+;server_idle_timeout = 60
+
+;; Cancel connection attepmt if server does not answer takes longer.
+;server_connect_timeout = 15
+
+;; If server login failed (server_connect_timeout or auth failure)
+;; then wait this many second.
+;server_login_retry = 15
+
+;; Dangerous.  Server connection is closed if query does not return
+;; in this time.  Should be used to survive network problems,
+;; _not_ as statement_timeout. (default: 0)
+;query_timeout = 0
+
+;; Dangerous.  Client connection is closed if no activity in this time.
+;; Should be used to survive network problems. (default: 0)
+;client_idle_timeout = 0
+
+
+;;;
+;;; Low-level tuning options
+;;;
+
+;; buffer for streaming packets
+;pkt_buf = 2048
+
+;; networking options, for info: man 7 tcp
+
+;; linux: notify program about new connection only if there
+;; is also data received.  (Seconds to wait.)
+;; On Linux the default is 45, on other OS'es 0.
+;tcp_defer_accept = 0
+
+;; In-kernel buffer size (linux default: 4096)
+;tcp_socket_buffer = 0
+
+;; whether tcp keepalive should be turned on (0/1)
+;tcp_keepalive = 0
+
+;; following options are linux-specific.
+;; they also require tcp_keepalive=1
+
+;; count of keepaliva packets
+;tcp_keepcnt = 0
+
+;; how long the connection can be idle,
+;; before sending keepalive packets
+;tcp_keepidle = 0
+
+;; The time between individual keepalive probes.
+;tcp_keepintvl = 0
+
diff --git a/etc/small.ini b/etc/small.ini
new file mode 100644 (file)
index 0000000..7692e9c
--- /dev/null
@@ -0,0 +1,30 @@
+
+[databases]
+evtest = host=127.0.0.1
+provider = host=127.0.0.1
+postgres = host=127.0.0.1
+orderdb = host=127.0.0.1
+forcedb = host=127.0.0.1 port=300 user=baz password=foo client_encoding=UNICODE datestyle=ISO
+marko = host=127.0.0.1 port=5432 pool_size=5
+orderdb_test = host=192.168.125.155
+test_part = host=127.0.0.1
+
+[pgbouncer]
+logfile = pgbouncer.log
+;pidfile = pgbouncer.pid
+
+listen_addr = 127.0.0.1
+listen_port = 6000
+unix_socket_dir = /tmp
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+#auth_file = 8.0/main/global/pg_auth
+auth_file = etc/userlist.txt
+
+; session, transaction, statement
+pool_mode = session
+
+max_client_conn = 100
+default_pool_size = 20
+
diff --git a/etc/test.ini b/etc/test.ini
new file mode 100644 (file)
index 0000000..503c31b
--- /dev/null
@@ -0,0 +1,31 @@
+[databases]
+marko = host=127.0.0.1
+
+[pgbouncer]
+logfile = lib/pgbouncer.log
+pidfile = lib/pgbouncer.pid
+
+#listen_addr = 127.0.0.1
+listen_port = 6000
+unix_socket_dir = /tmp
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+auth_file = etc/test.users
+
+; When server connection is released back to pool:
+;   session      - after client disconnects
+;   transaction  - after transaction finishes
+;   statement    - after statement finishes
+pool_mode = transaction
+
+server_check_query = select 1
+server_check_delay = 10
+max_client_conn = 2000
+default_pool_size = 80
+
+admin_users = plproxy
+stats_users = marko
+
+stats_period = 60
+
diff --git a/etc/test.users b/etc/test.users
new file mode 100644 (file)
index 0000000..5dd5911
--- /dev/null
@@ -0,0 +1,12 @@
+"admin" "" ""
+"backoffice" "" ""
+"info" "" ""
+"martinp" "md55c06ac8c93212495f8eaf6a7ffd688dd" ""
+"plproxy" "md5a704fc5c9a4bf2f745acc6f7a7ec2f2f" ""
+"postgres" "md5264abda62970ba635b133f545ce12132" ""
+"priitk" "md55c08f2e34592ddb13972db7eaadc1232" ""
+"replicator" "" ""
+"webstore" "" ""
+"wypbe" "md57e17e9c6cfde1c1f6f9155071d7d18a8" ""
+"wypfe" "md5e3b7c35f688032d97ab066210a33184b" ""
+"marko" "funky"
diff --git a/etc/userlist.txt b/etc/userlist.txt
new file mode 100644 (file)
index 0000000..166f46d
--- /dev/null
@@ -0,0 +1,3 @@
+"marko" "asdasd"
+"postgres" "asdasd"
+"pgbouncer" "fake"
diff --git a/src/admin.c b/src/admin.c
new file mode 100644 (file)
index 0000000..b6fd1a0
--- /dev/null
@@ -0,0 +1,888 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bouncer.h"
+
+#include <regex.h>
+
+/* regex elements */
+#define WS0    "[ \t\n\r]*"
+#define WS1    "[ \t\n\r]+"
+#define WORD   "([0-9a-z_]+)"
+#define STRING "'(([^']*|'')*)'"
+
+/* possible max + 1 */
+#define MAX_GROUPS 10
+
+/* group numbers */
+#define SHOW_ARG 1
+#define SET_KEY 1
+#define SET_VAL 2
+#define SINGLECMD 1
+
+/* SHOW */
+static const char cmd_show_rx[] =
+"^" WS0 "show" WS1 WORD "?" WS0 ";" WS0 "$";
+
+/* SET with simple value */
+static const char cmd_set_word_rx[] =
+"^" WS0 "set" WS1 WORD WS0 "=" WS0 WORD WS0 ";" WS0 "$";
+
+/* SET with quoted value */
+static const char cmd_set_str_rx[] =
+"^" WS0 "set" WS1 WORD WS0 "=" WS0 STRING WS0 ";" WS0 "$";
+
+/* single word cmd */
+static const char cmd_single_rx[] =
+"^" WS0 WORD ";" WS0 "$";
+
+/* compiled regexes */
+static regex_t rc_show;
+static regex_t rc_set_word;
+static regex_t rc_set_str;
+static regex_t rc_single;
+
+static PgPool *admin_pool;
+
+bool admin_error(PgSocket *admin, const char *fmt, ...)
+{
+       char str[1024];
+       va_list ap;
+       bool res = true;
+
+       va_start(ap, fmt);
+       vsnprintf(str, sizeof(str), fmt, ap);
+       va_end(ap);
+
+       log_error("%s", str);
+       if (admin)
+               res = send_pooler_error(admin, true, str);
+       return res;
+}
+
+void admin_flush(PgSocket *admin, PktBuf *buf, const char *desc)
+{
+       pktbuf_write_CommandComplete(buf, desc);
+       pktbuf_write_ReadyForQuery(buf);
+       pktbuf_send_queued(buf, admin);
+}
+
+bool admin_ready(PgSocket *admin, const char *desc)
+{
+       PktBuf buf;
+       uint8 tmp[512];
+       pktbuf_static(&buf, tmp, sizeof(tmp));
+       pktbuf_write_CommandComplete(&buf, desc);
+       pktbuf_write_ReadyForQuery(&buf);
+       return pktbuf_send_immidiate(&buf, admin);
+}
+
+/* Command: SET key = val; */
+static bool admin_set(PgSocket *admin, const char *key, const char *val)
+{
+       char tmp[512];
+
+       if (admin->admin_user) {
+               if (set_config_param(bouncer_params, key, val, true, admin)) {
+                       snprintf(tmp, sizeof(tmp), "SET %s=%s", key, val);
+                       return admin_ready(admin, tmp);
+               } else {
+                       return admin_error(admin, "SET failed");
+               }
+       } else
+               return admin_error(admin, "admin access needed");
+}
+
+/* send a row with sendmsg, optionally attaching a fd */
+static bool send_one_fd(PgSocket *admin,
+                       int fd, const char *task,
+                       const char *user, const char *db,
+                       const char *addr, int port,
+                       uint64 ckey, int link)
+{
+       struct msghdr msg;
+       struct cmsghdr *cmsg;
+       int res;
+       struct iovec iovec;
+       uint8 pktbuf[1024];
+       uint8 cntbuf[CMSG_SPACE(sizeof(int))];
+
+       iovec.iov_base = pktbuf;
+       BUILD_DataRow(res, pktbuf, sizeof(pktbuf), "issssiqi",
+                     fd, task, user, db, addr, port, ckey, link);
+       if (res < 0)
+               return false;
+       iovec.iov_len = res;
+
+       /* sending fds */
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_iov = &iovec;
+       msg.msg_iovlen = 1;
+
+       /* attach a fd */
+       if (admin->addr.is_unix && admin->own_user) {
+               msg.msg_control = cntbuf;
+               msg.msg_controllen = sizeof(cntbuf);
+
+               cmsg = CMSG_FIRSTHDR(&msg);
+               cmsg->cmsg_level = SOL_SOCKET;
+               cmsg->cmsg_type = SCM_RIGHTS;
+               cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+
+               memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+               msg.msg_controllen = cmsg->cmsg_len;
+       }
+
+       slog_debug(admin, "sending socket list: fd=%d, len=%d",
+                  fd, msg.msg_controllen);
+       res = safe_sendmsg(sbuf_socket(&admin->sbuf), &msg, 0);
+       if (res < 0) {
+               log_error("send_one_fd: sendmsg error: %s", strerror(errno));
+               return false;
+       } else if (res != iovec.iov_len) {
+               log_error("send_one_fd: partial sendmsg");
+               return false;
+       }
+       return true;
+}
+
+/* send a row with sendmsg, optionally attaching a fd */
+static bool show_one_fd(PgSocket *admin, PgSocket *sk)
+{
+       PgAddr *addr = &sk->addr;
+       MBuf tmp;
+
+       mbuf_init(&tmp, sk->cancel_key, 8);
+
+       return send_one_fd(admin, sbuf_socket(&sk->sbuf),
+                          is_server_socket(sk) ? "server" : "client",
+                          sk->auth_user ? sk->auth_user->name : NULL,
+                          sk->pool ? sk->pool->db->name : NULL,
+                          addr->is_unix ? "unix" : inet_ntoa(addr->ip_addr),
+                          addr->port,
+                          mbuf_get_uint64(&tmp),
+                          sk->link ? sbuf_socket(&sk->link->sbuf) : 0);
+}
+
+/* send a row with sendmsg, optionally attaching a fd */
+static bool show_pooler_fds(PgSocket *admin)
+{
+       int fd_net, fd_unix;
+       bool res = true;
+
+       get_pooler_fds(&fd_net, &fd_unix);
+
+       if (fd_net)
+               res = send_one_fd(admin, fd_net, "pooler", NULL, NULL,
+                                 cf_listen_addr, cf_listen_port, 0, 0);
+       if (fd_unix && res)
+               res = send_one_fd(admin, fd_unix, "pooler", NULL, NULL,
+                                 "unix", cf_listen_port, 0, 0);
+       return res;
+}
+
+static bool show_fds_from_list(PgSocket *admin, StatList *list)
+{
+       List *item;
+       PgSocket *sk;
+       bool res = true;
+
+       statlist_for_each(item, list) {
+               sk = container_of(item, PgSocket, head);
+               res = show_one_fd(admin, sk);
+               if (!res)
+                       break;
+       }
+       return res;
+}
+
+/*
+ * Command: SHOW FDS
+ *
+ * If privileged connection, send also actual fds
+ */
+static bool admin_show_fds(PgSocket *admin)
+{
+       List *item;
+       PgPool *pool;
+       bool res;
+
+       /*
+        * Dangerous to show to everybody:
+        * - can lock pooler as code flips async option
+        * - show cancel keys for all users
+        */
+       if (!admin->admin_user)
+               return admin_error(admin, "admin access needed");
+
+       /*
+        * Its very hard to send it reliably over in async manner,
+        * so turn async off for this resultset.
+        */
+       socket_set_nonblocking(sbuf_socket(&admin->sbuf), 0);
+
+       /*
+        * send resultset
+        */
+       SEND_RowDescription(res, admin, "issssiqi",
+                                "fd", "task",
+                                "user", "database",
+                                "addr", "port",
+                                "cancel", "link");
+       if (res)
+               res = show_pooler_fds(admin);
+
+       if (res)
+               res = show_fds_from_list(admin, &login_client_list);
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               if (pool->admin)
+                       continue;
+               res = res && show_fds_from_list(admin, &pool->active_client_list);
+               res = res && show_fds_from_list(admin, &pool->waiting_client_list);
+               res = res && show_fds_from_list(admin, &pool->active_server_list);
+               res = res && show_fds_from_list(admin, &pool->idle_server_list);
+               res = res && show_fds_from_list(admin, &pool->used_server_list);
+               res = res && show_fds_from_list(admin, &pool->tested_server_list);
+               res = res && show_fds_from_list(admin, &pool->new_server_list);
+               if (!res)
+                       break;
+       }
+       if (res)
+               res = admin_ready(admin, "SHOW");
+
+       /* turn async back on */
+       socket_set_nonblocking(sbuf_socket(&admin->sbuf), 1);
+
+       return res;
+}
+
+/* Command: SHOW DATABASES */
+static bool admin_show_databases(PgSocket *admin)
+{
+       PgDatabase *db;
+       List *item;
+       char *host;
+       const char *f_user;
+       PktBuf *buf;
+
+       buf = pktbuf_dynamic(256);
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+
+       pktbuf_write_RowDescription(buf, "ssissi",
+                                   "name", "host", "port",
+                                   "database", "force_user", "pool_size");
+       statlist_for_each(item, &database_list) {
+               db = container_of(item, PgDatabase, head);
+
+               if (!db->addr.is_unix) {
+                       host = inet_ntoa(db->addr.ip_addr);
+               } else
+                       host = NULL;
+
+               f_user = db->forced_user ? db->forced_user->name : NULL;
+               pktbuf_write_DataRow(buf, "ssissi",
+                                    db->name, host, db->addr.port,
+                                    db->dbname, f_user,
+                                    db->pool_size);
+       }
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+
+/* Command: SHOW LISTS */
+static bool admin_show_lists(PgSocket *admin)
+{
+       PktBuf *buf = pktbuf_dynamic(256);
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+       pktbuf_write_RowDescription(buf, "si", "list", "items");
+#define SENDLIST(name, size) pktbuf_write_DataRow(buf, "si", (name), (size))
+       SENDLIST("databases", statlist_count(&database_list));
+       SENDLIST("users", statlist_count(&user_list));
+       SENDLIST("pools", statlist_count(&pool_list));
+       SENDLIST("free_clients", statlist_count(&free_client_list));
+       SENDLIST("used_clients", get_active_client_count());
+       SENDLIST("login_clients", statlist_count(&login_client_list));
+       SENDLIST("free_servers", statlist_count(&free_server_list));
+       SENDLIST("used_servers", get_active_server_count());
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+/* Command: SHOW USERS */
+static bool admin_show_users(PgSocket *admin)
+{
+       PgUser *user;
+       List *item;
+       PktBuf *buf = pktbuf_dynamic(256);
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+       pktbuf_write_RowDescription(buf, "s", "name");
+       statlist_for_each(item, &user_list) {
+               user = container_of(item, PgUser, head);
+               pktbuf_write_DataRow(buf, "s", user->name);
+       }
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+/* Helper for SHOW CLIENTS */
+static void show_client_list(PktBuf *buf, StatList *list, const char *state)
+{
+       List *item;
+       PgSocket *client;
+       const char *addr;
+
+       statlist_for_each(item, list) {
+               client = container_of(item, PgSocket, head);
+               addr = client->addr.is_unix ? "unix"
+                       : inet_ntoa(client->addr.ip_addr);
+
+               pktbuf_write_DataRow(buf, "ssssiTT",
+                                    client->auth_user->name,
+                                    client->pool->db->name,
+                                    state, addr, client->addr.port,
+                                    client->connect_time,
+                                    client->request_time);
+       }
+}
+
+/* Command: SHOW CLIENTS */
+static bool admin_show_clients(PgSocket *admin)
+{
+       List *item;
+       PgPool *pool;
+       PktBuf *buf = pktbuf_dynamic(256);
+
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+       pktbuf_write_RowDescription(buf, "ssssiTT",
+                                   "user", "database", "state",
+                                   "addr", "port", "connect_time", "request_time");
+       /* todo: age? query stats? */
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+
+               show_client_list(buf, &pool->active_client_list, "active");
+               show_client_list(buf, &pool->waiting_client_list, "waiting");
+       }
+
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+/* Helper for SHOW SERVERS */
+static void show_server_list(PktBuf *buf, StatList *list, const char *state)
+{
+       List *item;
+       PgSocket *server;
+       const char *addr;
+
+       statlist_for_each(item, list) {
+               server = container_of(item, PgSocket, head);
+               addr = server->addr.is_unix ? "unix"
+                       : inet_ntoa(server->addr.ip_addr);
+
+               pktbuf_write_DataRow(buf, "ssssiTT",
+                                    server->auth_user->name,
+                                    server->pool->db->name,
+                                    state, addr, server->addr.port,
+                                    server->connect_time,
+                                    server->request_time
+                                    );
+       }
+}
+
+/* Command: SHOW SERVERS */
+static bool admin_show_servers(PgSocket *admin)
+{
+       List *item;
+       PgPool *pool;
+       PktBuf *buf;
+
+       buf = pktbuf_dynamic(256);
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+       pktbuf_write_RowDescription(buf, "ssssiTT",
+                                   "database", "user", "state",
+                                   "addr", "port", "connect_time", "request_time");
+       /* todo: age? query stats */
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               show_server_list(buf, &pool->active_server_list, "active");
+               show_server_list(buf, &pool->idle_server_list, "idle");
+               show_server_list(buf, &pool->used_server_list, "used");
+               show_server_list(buf, &pool->tested_server_list, "tested");
+       }
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+/* Command: SHOW POOLS */
+static bool admin_show_pools(PgSocket *admin)
+{
+       List *item;
+       PgPool *pool;
+       PktBuf *buf;
+
+       buf = pktbuf_dynamic(256);
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+       pktbuf_write_RowDescription(buf, "ssiiiiiii",
+                                   "database", "user",
+                                   "cl_active", "cl_waiting",
+                                   "sv_active", "sv_idle",
+                                   "sv_used", "sv_tested",
+                                   "sv_login");
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               pktbuf_write_DataRow(buf, "ssiiiiiii",
+                                    pool->db->name, pool->user->name,
+                                    statlist_count(&pool->active_client_list),
+                                    statlist_count(&pool->waiting_client_list),
+                                    statlist_count(&pool->active_server_list),
+                                    statlist_count(&pool->idle_server_list),
+                                    statlist_count(&pool->used_server_list),
+                                    statlist_count(&pool->tested_server_list),
+                                    statlist_count(&pool->new_server_list));
+       }
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+/* Command: SHOW CONFIG */
+static bool admin_show_config(PgSocket *admin)
+{
+       ConfElem *cf;
+       int i = 0;
+       PktBuf *buf;
+
+       buf = pktbuf_dynamic(256);
+       if (!buf) {
+               admin_error(admin, "no mem");
+               return true;
+       }
+
+       pktbuf_write_RowDescription(buf, "sss", "key", "value", "changeable");
+       while (1) {
+               cf = &bouncer_params[i++];
+               if (!cf->name)
+                       break;
+
+               pktbuf_write_DataRow(buf, "sss",
+                                    cf->name, conf_to_text(cf),
+                                    cf->reloadable ? "yes" : "no");
+       }
+       admin_flush(admin, buf, "SHOW");
+       return true;
+}
+
+/* Command: RELOAD */
+static bool admin_cmd_reload(PgSocket *admin)
+{
+       if (!admin->admin_user)
+               return admin_error(admin, "admin access needed");
+
+       log_info("RELOAD command issued");
+       load_config(true);
+       return admin_ready(admin, "RELOAD");
+}
+
+/* Command: SHUTDOWN */
+static bool admin_cmd_shutdown(PgSocket *admin)
+{
+       if (!admin->admin_user)
+               return admin_error(admin, "admin access needed");
+
+       log_info("SHUTDOWN command issued");
+       exit(0);
+       return true;
+}
+
+/* Command: RESUME */
+static bool admin_cmd_resume(PgSocket *admin)
+{
+       int tmp_mode = cf_pause_mode;
+       if (!admin->admin_user)
+               return admin_error(admin, "admin access needed");
+
+       log_info("RESUME command issued");
+       cf_pause_mode = 0;
+       switch (tmp_mode) {
+       case 2:
+               resume_all();
+       case 1:
+               return admin_ready(admin, "RESUME");
+       default:
+               return admin_error(admin, "Pooler is not paused/suspended");
+       }
+}
+
+/* Command: SUSPEND */
+static bool admin_cmd_suspend(PgSocket *admin)
+{
+       if (!admin->admin_user)
+               return admin_error(admin, "admin access needed");
+
+       if (cf_pause_mode)
+               return admin_error(admin, "already suspended/paused");
+
+       log_info("SUSPEND command issued");
+       cf_pause_mode = 2;
+       admin->wait_for_response = 1;
+       suspend_pooler();
+
+       return true;
+}
+
+/* Command: PAUSE */
+static bool admin_cmd_pause(PgSocket *admin)
+{
+       if (!admin->admin_user)
+               return admin_error(admin, "admin access needed");
+
+       if (cf_pause_mode)
+               return admin_error(admin, "already suspended/paused");
+
+       log_info("PAUSE command issued");
+       cf_pause_mode = 1;
+       admin->wait_for_response = 1;
+
+       return true;
+}
+
+/* extract substring from regex group */
+static void copy_arg(const char *src, regmatch_t *glist,
+                    int gnum, char *dst, int dstmax)
+{
+       regmatch_t *g = &glist[gnum];
+       unsigned len = g->rm_eo - g->rm_so;
+       if (len < dstmax)
+               memcpy(dst, src + g->rm_so, len);
+       else
+               len = 0;
+       dst[len] = 0;
+}
+
+/* extract quoted substring from regex group */
+static void copy_arg_unquote(const char *str, regmatch_t *glist,
+                            int gnum, char *dst, int dstmax)
+{
+       regmatch_t *g = &glist[gnum];
+       int len = g->rm_eo - g->rm_so;
+       const char *src = str + g->rm_so;
+       const char *end = src + len;
+
+       if (len < dstmax) {
+               len = 0;
+               while (src < end) {
+                       if (src[0] == '\'' && src[1] == '\'') {
+                               *dst++ = '\'';
+                               src += 2;
+                       } else
+                               *dst++ = *src++;
+               }
+       }
+       *dst = 0;
+}
+
+static bool admin_show_help(PgSocket *admin)
+{
+       bool res;
+       SEND_generic(res, admin, 'N',
+               "sssss",
+               "SNOTICE", "C00000", "MConsole usage",
+               "D\n\tSHOW [HELP|CONFIG|DATABASES|FDS"
+               "|POOLS|CLIENTS|SERVERS|LISTS|VERSION]\n"
+               "\tSET key = arg\n"
+               "\tRELOAD\n"
+               "\tPAUSE\n"
+               "\tSUSPEND\n"
+               "\tRESUME\n"
+               "\tSHUTDOWN", "");
+       if (res)
+               res = admin_ready(admin, "SHOW");
+       return res;
+}
+
+static bool admin_show_version(PgSocket *admin)
+{
+       bool res;
+       SEND_generic(res, admin, 'N',
+               "ssss", "SNOTICE", "C00000",
+               "MPgBouncer version " PACKAGE_VERSION, "");
+       if (res)
+               res = admin_ready(admin, "SHOW");
+       return res;
+}
+
+/* handle user query */
+static bool admin_parse_query(PgSocket *admin, const char *q)
+{
+       regmatch_t grp[MAX_GROUPS];
+       char key[64];
+       char val[256];
+       bool res = true;
+
+       if (regexec(&rc_show, q, MAX_GROUPS, grp, 0) == 0) {
+               copy_arg(q, grp, SHOW_ARG, key, sizeof(key));
+               if (strcasecmp(key, "help") == 0) {
+                       res = admin_show_help(admin);
+               } else if (strcasecmp(key, "stats") == 0) {
+                       res = admin_database_stats(admin, &pool_list);
+               } else if (strcasecmp(key, "config") == 0) {
+                       res = admin_show_config(admin);
+               } else if (strcasecmp(key, "databases") == 0) {
+                       res = admin_show_databases(admin);
+               } else if (strcasecmp(key, "users") == 0) {
+                       res = admin_show_users(admin);
+               } else if (strcasecmp(key, "pools") == 0) {
+                       res = admin_show_pools(admin);
+               } else if (strcasecmp(key, "clients") == 0) {
+                       res = admin_show_clients(admin);
+               } else if (strcasecmp(key, "servers") == 0) {
+                       res = admin_show_servers(admin);
+               } else if (strcasecmp(key, "lists") == 0) {
+                       res = admin_show_lists(admin);
+               } else if (strcasecmp(key, "fds") == 0) {
+                       res = admin_show_fds(admin);
+               } else if (strcasecmp(key, "version") == 0) {
+                       res = admin_show_version(admin);
+               } else
+                       res = admin_error(admin, "bad SHOW arg, use SHOW HELP");
+       } else if (regexec(&rc_set_str, q, MAX_GROUPS, grp, 0) == 0) {
+               copy_arg(q, grp, SET_KEY, key, sizeof(key));
+               copy_arg_unquote(q, grp, SET_VAL, val, sizeof(val));
+               if (!key[0] || !val[0]) {
+                       res = admin_error(admin, "bad arguments");
+               } else
+                       res = admin_set(admin, key, val);
+       } else if (regexec(&rc_set_word, q, MAX_GROUPS, grp, 0) == 0) {
+               copy_arg(q, grp, SET_KEY, key, sizeof(key));
+               copy_arg(q, grp, SET_VAL, val, sizeof(val));
+               if (!key[0] || !val[0]) {
+                       res = admin_error(admin, "bad arguments");
+               } else
+                       res = admin_set(admin, key, val);
+       } else if (regexec(&rc_single, q, MAX_GROUPS, grp, 0) == 0) {
+               copy_arg(q, grp, SINGLECMD, key, sizeof(key));
+               if (strcasecmp(key, "SHUTDOWN") == 0)
+                       res = admin_cmd_shutdown(admin);
+               else if (strcasecmp(key, "SUSPEND") == 0)
+                       res = admin_cmd_suspend(admin);
+               else if (strcasecmp(key, "PAUSE") == 0)
+                       res = admin_cmd_pause(admin);
+               else if (strcasecmp(key, "RESUME") == 0)
+                       res = admin_cmd_resume(admin);
+               else if (strcasecmp(key, "RELOAD") == 0)
+                       res = admin_cmd_reload(admin);
+               else
+                       res = admin_error(admin, "unknown command: %s", q);
+       } else
+               res = admin_error(admin, "unknown cmd: %s", q);
+
+       if (!res)
+               disconnect_client(admin, true, "failure");
+       return res;
+}
+
+/* handle packets */
+bool admin_handle_client(PgSocket *admin, MBuf *pkt, int pkt_type, int pkt_len)
+{
+       const char *q;
+       bool res;
+
+       /* dont tolerate partial packets */
+       if (mbuf_avail(pkt) < pkt_len - 5) {
+               disconnect_client(admin, true, "incomplete pkt");
+               return false;
+       }
+
+       switch (pkt_type) {
+       case 'Q':
+               q = mbuf_get_string(pkt);
+               if (!q) {
+                       disconnect_client(admin, true, "incomplete query");
+                       return false;
+               }
+               log_debug("got admin query: %s", q);
+               res = admin_parse_query(admin, q);
+               if (res)
+                       sbuf_prepare_skip(&admin->sbuf, pkt_len);
+               return res;
+       case 'X':
+               disconnect_client(admin, false, "close req");
+               break;
+       default:
+               admin_error(admin, "unsupported pkt type: %d", pkt_type);
+               disconnect_client(admin, true, "bad pkt");
+               break;
+       }
+       return false;
+}
+
+/**
+ * Client is unauthenticated, look if it wants to connect
+ * to special "pgbouncer" user.
+ */
+bool admin_pre_login(PgSocket *client)
+{
+       uid_t peer_uid = 0;
+       bool res;
+       const char *username = client->auth_user->name;
+
+       client->admin_user = 0;
+       client->own_user = 0;
+
+       /* tag same uid as special */
+       if (client->addr.is_unix) {
+               res = get_unix_peer_uid(sbuf_socket(&client->sbuf), &peer_uid);
+               if (res && peer_uid == getuid()
+                       && strcmp("pgbouncer", username) == 0)
+               {
+                       client->own_user = 1;
+                       client->admin_user = 1;
+                       slog_info(client, "pgbouncer access from unix socket");
+                       return true;
+               }
+       }
+
+       if (strlist_contains(cf_admin_users, username)) {
+               client->admin_user = 1;
+               return true;
+       } else if (strlist_contains(cf_stats_users, username)) {
+               return true;
+       }
+       disconnect_client(client, true, "not allowed");
+       return false;
+}
+
+/* init special database and query parsing */
+void admin_setup(void)
+{
+       PgDatabase *db;
+       PgPool *pool;
+       PgUser *user;
+       PktBuf msg;
+       int res;
+
+       /* fake database */
+       db = add_database("pgbouncer");
+       if (!db)
+               fatal("no mem for admin database");
+
+       db->addr.port = cf_listen_port;
+       db->addr.is_unix = 1;
+       db->pool_size = 2;
+       force_user(db, "pgbouncer", "");
+
+       /* fake pool, tag the it as special */
+       pool = get_pool(db, db->forced_user);
+       if (!pool)
+               fatal("cannot create admin pool?");
+       pool->admin = 1;
+       admin_pool = pool;
+
+       /* fake user, with disabled psw */
+       user = add_user("pgbouncer", "");
+       if (!user)
+               fatal("cannot create admin user?");
+       create_auth_cache();
+
+       /* prepare welcome */
+       pktbuf_static(&msg, db->welcome_msg, sizeof(db->welcome_msg));
+       pktbuf_write_AuthenticationOk(&msg);
+       pktbuf_write_ParameterStatus(&msg, "server_version", "8.0/bouncer");
+       pktbuf_write_ParameterStatus(&msg, "client_encoding", "UNICODE");
+       pktbuf_write_ParameterStatus(&msg, "server_encoding", "UNICODE");
+       pktbuf_write_ParameterStatus(&msg, "is_superuser", "on");
+
+       db->welcome_msg_len = pktbuf_written(&msg);
+       db->welcome_msg_ready = 1;
+
+       pktbuf_static(&msg, db->startup_params, sizeof(db->startup_params));
+       pktbuf_put_string(&msg, "database");
+       db->dbname = (char *)db->startup_params + pktbuf_written(&msg);
+       pktbuf_put_string(&msg, "pgbouncer");
+       db->startup_params_len = pktbuf_written(&msg);
+
+       /* initialize regexes */
+       res = regcomp(&rc_show, cmd_show_rx, REG_EXTENDED | REG_ICASE);
+       if (res != 0)
+               fatal("cmd show regex compilation error");
+       res = regcomp(&rc_set_word, cmd_set_word_rx, REG_EXTENDED | REG_ICASE);
+       if (res != 0)
+               fatal("set/word regex compilation error");
+       res = regcomp(&rc_set_str, cmd_set_str_rx, REG_EXTENDED | REG_ICASE);
+       if (res != 0)
+               fatal("set/str regex compilation error");
+       res = regcomp(&rc_single, cmd_single_rx, REG_EXTENDED | REG_ICASE);
+       if (res != 0)
+               fatal("singleword regex compilation error");
+}
+
+void admin_pause_done(void)
+{
+       List *item, *tmp;
+       PgSocket *admin;
+
+       statlist_for_each_safe(item, &admin_pool->active_client_list, tmp) {
+               admin = container_of(item, PgSocket, head);
+               if (!admin->wait_for_response)
+                       continue;
+
+               switch (cf_pause_mode) {
+               case 1:
+                       admin_ready(admin, "PAUSE");
+                       break;
+               case 2:
+                       admin_ready(admin, "SUSPEND");
+                       break;
+               default:
+                       fatal("admin_pause_done: bad state");
+               }
+               admin->wait_for_response = 0;
+       }
+
+       if (statlist_empty(&admin_pool->active_client_list)
+                       && cf_pause_mode == 2)
+       {
+               log_info("Admin disappeared when suspended, doing RESUME");
+               cf_pause_mode = 0;
+               resume_all();
+       }
+}
+
diff --git a/src/admin.h b/src/admin.h
new file mode 100644 (file)
index 0000000..dcfd14c
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+bool admin_handle_client(PgSocket *client, MBuf *pkt, int pkt_type, int pkt_len);
+bool admin_pre_login(PgSocket *client);
+void admin_setup(void);
+bool admin_error(PgSocket *console, const char *fmt, ...);
+void admin_pause_done(void);
+void admin_flush(PgSocket *admin, PktBuf *buf, const char *desc);
+bool admin_ready(PgSocket *admin, const char *desc);
diff --git a/src/bouncer.h b/src/bouncer.h
new file mode 100644 (file)
index 0000000..b0af276
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * core structures
+ */
+
+#include "system.h"
+
+#include <event.h>
+
+/* each state corresponts to a list */
+enum SocketState {
+       CL_FREE,                /* free_client_list */
+       CL_LOGIN,               /* login_client_list */
+       CL_WAITING,             /* pool->waiting_client_list */
+       CL_ACTIVE,              /* pool->active_client_list */
+       CL_CANCEL,              /* pool->cancel_req_list */
+
+       SV_FREE,                /* free_server_list */
+       SV_LOGIN,               /* pool->new_server_list */
+       SV_IDLE,                /* pool->idle_server_list */
+       SV_ACTIVE,              /* pool->active_server_list */
+       SV_USED,                /* pool->used_server_list */
+       SV_TESTED               /* pool->tested_server_list */
+};
+
+#define is_server_socket(sk) ((sk)->state >= SV_FREE)
+
+
+typedef struct PgSocket PgSocket;
+typedef struct PgUser PgUser;
+typedef struct PgDatabase PgDatabase;
+typedef struct PgPool PgPool;
+typedef struct PgStats PgStats;
+typedef struct PgAddr PgAddr;
+typedef enum SocketState SocketState;
+
+#include "util.h"
+#include "list.h"
+#include "mbuf.h"
+#include "sbuf.h"
+#include "pktbuf.h"
+
+#include "admin.h"
+#include "loader.h"
+#include "client.h"
+#include "server.h"
+#include "pooler.h"
+#include "proto.h"
+#include "objects.h"
+#include "stats.h"
+#include "takeover.h"
+#include "janitor.h"
+
+/* to avoid allocations will use static buffers */
+#define MAX_DBNAME     64
+#define MAX_USERNAME   64
+#define MAX_PASSWORD   64
+
+/* auth modes, should match PG's */
+#define AUTH_ANY       -1 /* same as trust but without username check */
+#define AUTH_TRUST     0
+#define AUTH_PLAIN     3
+#define AUTH_CRYPT     4
+#define AUTH_MD5       5
+#define AUTH_CREDS     6
+
+/* type codes for weird pkts */
+#define PKT_STARTUP     0x30000
+#define PKT_SSLREQ      80877103
+#define PKT_CANCEL      80877102
+
+#define POOL_SESSION   0
+#define POOL_TX                1
+#define POOL_STMT      2
+
+struct PgAddr {
+       struct in_addr ip_addr;
+       unsigned short port;
+       unsigned is_unix:1;
+};
+
+struct PgStats {
+       uint64          request_count;
+       uint64          server_bytes;
+       uint64          client_bytes;
+       usec_t          query_time;     /* total req time in us */
+};
+
+/* contains connections for one db/user combo */
+struct PgPool {
+       List            head;           /* all pools */
+       List            map_head;       /* pools for specific client/db */
+
+       /* pool contains connection into 'db' under 'user' */
+       PgDatabase *    db;
+       PgUser *        user;
+
+       /* waiting events logged in clients */
+       StatList        active_client_list;
+       /* client waits for a server to be available */
+       StatList        waiting_client_list;
+       /* closed client connections with server key */
+       StatList        cancel_req_list;
+
+       /* servers linked with clients */
+       StatList        active_server_list;
+       /* servers ready to be linked with clients */
+       StatList        idle_server_list;
+       /* server just unlinked from clients */
+       StatList        used_server_list;
+       /* server in testing process */
+       StatList        tested_server_list;
+       /* servers in login phase */
+       StatList        new_server_list;
+
+       /* stats */
+       PgStats         stats;
+       PgStats         newer_stats;
+       PgStats         older_stats;
+
+       /* if last connect failed, there should be delay before next */
+       usec_t          last_connect_time;
+       unsigned        last_connect_failed:1;
+       unsigned        admin:1;
+};
+
+#define pool_server_count(pool) ( \
+               statlist_count(&(pool)->active_server_list) + \
+               statlist_count(&(pool)->idle_server_list) + \
+               statlist_count(&(pool)->new_server_list) + \
+               statlist_count(&(pool)->tested_server_list) + \
+               statlist_count(&(pool)->used_server_list))
+
+#define pool_client_count(pool) ( \
+               statlist_count(&(pool)->active_client_list) + \
+               statlist_count(&(pool)->waiting_client_list))
+
+struct PgUser {
+       List head;
+       List pool_list;
+       char name[MAX_USERNAME];
+       char passwd[MAX_PASSWORD];
+};
+
+struct PgDatabase {
+       List                    head;
+       char                    name[MAX_DBNAME];
+
+       /* database info to be sent to client */
+       uint8                   welcome_msg[512];
+       unsigned                welcome_msg_len;
+       unsigned                welcome_msg_ready:1;
+
+       /* key/val pairs (without user) for startup msg to be sent to server */
+       uint8                   startup_params[256];
+       unsigned                startup_params_len;
+
+       /* if not NULL, the user/psw is forced */
+       PgUser *                forced_user;
+
+       /* address prepared for connect() */
+       PgAddr                  addr;
+
+       /* max server connections in one pool */
+       int                     pool_size;
+
+       /* info fields, pointer to inside startup_msg */
+       const char *            dbname;
+};
+
+struct PgSocket {
+       List            head;           /* list header */
+       PgSocket *      link;           /* the dest of packets */
+       PgPool *        pool;           /* parent pool, if NULL not yet assigned */
+
+       SocketState     state;
+
+       unsigned        wait_for_welcome:1;     /* no server yet in pool */
+       unsigned        ready:1;                /* server accepts new query */
+       unsigned        flush_req:1;            /* client requested flush */
+       unsigned        admin_user:1;
+       unsigned        own_user:1;             /* is console client with same uid */
+
+       /* if the socket is suspended */
+       unsigned        suspended:1;
+
+       /* admin conn, waits for completion of PAUSE/SUSPEND cmd */
+       unsigned        wait_for_response:1;
+       /* this (server0 socket must be closed ASAP */
+       unsigned        close_needed:1;
+
+       usec_t          connect_time;   /* when connection was made */
+       usec_t          request_time;   /* last activity time */
+       usec_t          query_start;    /* query start moment */
+
+       char            salt[4];
+       uint8           cancel_key[8];
+       PgUser *        auth_user;
+       PgAddr          addr;
+
+       SBuf            sbuf;           /* stream buffer, must be last */
+};
+
+/* where to store old fd info during SHOW FDS result processing */
+#define tmp_sk_oldfd   request_time
+#define tmp_sk_linkfd  query_start
+/* takeover_clean_socket() needs to clean those up */
+
+/* main.c */
+extern int cf_verbose;
+extern int cf_daemon;
+
+extern char *cf_unix_socket_dir;
+extern char *cf_listen_addr;
+extern int cf_listen_port;
+
+extern int cf_pool_mode;
+extern int cf_max_client_conn;
+extern int cf_default_pool_size;
+
+extern usec_t cf_server_lifetime;
+extern usec_t cf_server_idle_timeout;
+extern char * cf_server_check_query;
+extern usec_t cf_server_check_delay;
+extern usec_t cf_server_connect_timeout;
+extern usec_t cf_server_login_retry;
+extern usec_t cf_query_timeout;
+extern usec_t cf_client_idle_timeout;
+
+extern int cf_auth_type;
+extern char *cf_auth_file;
+
+extern char *cf_logfile;
+extern char *cf_pidfile;
+
+extern char *cf_admin_users;
+extern char *cf_stats_users;
+extern int cf_stats_period;
+
+extern int cf_pause_mode;
+extern int cf_shutdown;
+extern int cf_reboot;
+
+extern int cf_sbuf_len;
+extern int cf_tcp_keepalive;
+extern int cf_tcp_keepcnt;
+extern int cf_tcp_keepidle;
+extern int cf_tcp_keepintvl;
+extern int cf_tcp_socket_buffer;
+extern int cf_tcp_defer_accept;
+
+extern ConfElem bouncer_params[];
+
+
+static inline PgSocket *
+pop_socket(StatList *slist)
+{
+       List *item = statlist_pop(slist);
+       if (item == NULL)
+               return NULL;
+       return container_of(item, PgSocket, head);
+}
+
+static inline PgSocket *
+first_socket(StatList *slist)
+{
+       if (statlist_empty(slist)) {
+               log_debug("first_socket: statlist_empty");
+               return NULL;
+       }
+       log_debug("first_socket: next=%p", slist->head.next);
+       return container_of(slist->head.next, PgSocket, head);
+}
+
+void load_config(bool reload);
+
+
diff --git a/src/client.c b/src/client.c
new file mode 100644 (file)
index 0000000..ee55c6c
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Client connection handling
+ */
+
+#include "bouncer.h"
+
+static bool check_client_passwd(PgSocket *client, const char *passwd)
+{
+       char md5[MD5_PASSWD_LEN + 1];
+       const char *correct;
+       PgUser *user = client->auth_user;
+
+       /* disallow empty passwords */
+       if (!*passwd || !*user->passwd)
+               return false;
+
+       switch (cf_auth_type) {
+       case AUTH_PLAIN:
+               return strcmp(user->passwd, passwd) == 0;
+       case AUTH_CRYPT:
+               correct = pg_crypt(user->passwd, (char *)client->salt);
+               return strcmp(correct, passwd) == 0;
+       case AUTH_MD5:
+               if (strlen(passwd) != MD5_PASSWD_LEN)
+                       return false;
+               if (!isMD5(user->passwd))
+                       pg_md5_encrypt(user->passwd, user->name, strlen(user->name), user->passwd);
+               pg_md5_encrypt(user->passwd + 3, client->salt, 4, md5);
+               return strcmp(md5, passwd) == 0;
+       }
+       return false;
+}
+
+bool
+set_pool(PgSocket *client, const char *dbname, const char *username)
+{
+       PgDatabase *db;
+       PgUser *user;
+
+       /* find database */
+       db = find_database(dbname);
+       if (!db) {
+               disconnect_client(client, true, "No such database");
+               return false;
+       }
+
+       /* find user */
+       if (cf_auth_type == AUTH_ANY) {
+               /* ignore requested user */
+               user = NULL;
+
+               if (db->forced_user == NULL) {
+                       disconnect_client(client, true, "bouncer config error");
+                       log_error("auth_type=any requires forced user");
+                       return false;
+               }
+               client->auth_user = db->forced_user;
+       } else {
+               /* the user clients wants to log in as */
+               user = find_user(username);
+               if (!user) {
+                       disconnect_client(client, true, "No such user");
+                       return false;
+               }
+               client->auth_user = user;
+       }
+
+       /* pool user may be forced */
+       if (db->forced_user)
+               user = db->forced_user;
+       client->pool = get_pool(db, user);
+       if (!client->pool) {
+               disconnect_client(client, true, "no mem for pool");
+               return false;
+       }
+
+       return true;
+}
+
+static bool decide_startup_pool(PgSocket *client, MBuf *pkt)
+{
+       const char *username = NULL, *dbname = NULL;
+       const char *key, *val;
+
+       while (1) {
+               key = mbuf_get_string(pkt);
+               if (!key || *key == 0)
+                       break;
+               val = mbuf_get_string(pkt);
+               if (!val)
+                       break;
+
+               if (strcmp(key, "database") == 0)
+                       dbname = val;
+               else if (strcmp(key, "user") == 0)
+                       username = val;
+       }
+       if (!username) {
+               disconnect_client(client, true, "No username supplied");
+               return false;
+       }
+       if (!dbname) {
+               disconnect_client(client, true, "No database supplied");
+               return false;
+       }
+       slog_debug(client, "login request: db=%s user=%s", dbname, username);
+
+       /* check if limit allows, dont limit admin db
+          nb: new incoming conn will be attached to PgSocket, thus
+          get_active_client_count() counts it */
+       if (get_active_client_count() > cf_max_client_conn) {
+               if (strcmp(dbname, "pgbouncer") != 0) {
+                       disconnect_client(client, true, "no more conns allowed");
+                       return false;
+               }
+       }
+       return set_pool(client, dbname, username);
+}
+
+static const char valid_crypt_salt[] =
+"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+static bool send_client_authreq(PgSocket *client)
+{
+       uint8 saltlen = 0;
+       int res;
+       int auth = cf_auth_type;
+
+       if (auth == AUTH_CRYPT) {
+               saltlen = 2;
+               get_random_bytes((void*)client->salt, saltlen);
+               client->salt[0] = valid_crypt_salt[client->salt[0] & 0x3f];
+               client->salt[1] = valid_crypt_salt[client->salt[1] & 0x3f];
+               client->salt[2] = 0;
+       } else if (cf_auth_type == AUTH_MD5) {
+               saltlen = 4;
+               get_random_bytes((void*)client->salt, saltlen);
+       } else if (auth == AUTH_ANY)
+               auth = AUTH_TRUST;
+
+       SEND_generic(res, client, 'R', "ib", auth, client->salt, saltlen);
+       return res;
+}
+
+/* decide on packets of client in login phase */
+static bool handle_client_startup(PgSocket *client, MBuf *pkt)
+{
+       unsigned pkt_type;
+       unsigned pkt_len;
+       const char *passwd;
+
+       SBuf *sbuf = &client->sbuf;
+
+       /* dont tolerate partial packets */
+       if (!get_header(pkt, &pkt_type, &pkt_len)) {
+               disconnect_client(client, true, "client sent bad pkt header");
+               return false;
+       }
+
+       if (client->wait_for_welcome) {
+               if  (finish_client_login(client)) {
+                       /* the packet was already parsed */
+                       sbuf_prepare_skip(sbuf, pkt_len);
+                       return true;
+               } else
+                       return false;
+       }
+
+       slog_noise(client, "pkt='%c' len=%d",
+                  pkt_type < 256 ? pkt_type : '?', pkt_len);
+
+       switch (pkt_type) {
+       case PKT_SSLREQ:
+               log_noise("C: req SSL");
+               log_noise("P: nak");
+               sbuf_answer(&client->sbuf, "N", 1);
+               break;
+       case PKT_STARTUP:
+               if (mbuf_avail(pkt) < pkt_len - 8) {
+                       disconnect_client(client, true, "client sent partial pkt in startup");
+                       return false;
+               }
+               if (client->pool) {
+                       disconnect_client(client, true, "client re-sent startup pkt");
+                       return false;
+               }
+
+               if (!decide_startup_pool(client, pkt))
+                       return false;
+
+               if (client->pool->admin) {
+                       if (!admin_pre_login(client))
+                               return false;
+               }
+
+               if (cf_auth_type <= AUTH_TRUST || client->own_user) {
+                       if (!finish_client_login(client))
+                               return false;
+               } else {
+                       send_client_authreq(client);
+               }
+               break;
+       case 'p':               /* PasswordMessage */
+               if (mbuf_avail(pkt) < pkt_len - 5) {
+                       disconnect_client(client, true, "client sent partial pkt in startup");
+                       return false;
+               }
+
+               /* havent requested it */
+               if (cf_auth_type <= AUTH_TRUST) {
+                       disconnect_client(client, true, "unreqested passwd pkt");
+                       return false;
+               }
+
+               passwd = mbuf_get_string(pkt);
+               if (passwd && check_client_passwd(client, passwd)) {
+                       if (!finish_client_login(client))
+                               return false;
+               } else {
+                       disconnect_client(client, true, "Login failed");
+                       return false;
+               }
+               break;
+       case PKT_CANCEL:
+               if (mbuf_avail(pkt) == 8) {
+                       const uint8 *key = mbuf_get_bytes(pkt, 8);
+                       memcpy(client->cancel_key, key, 8);
+                       accept_cancel_request(client);
+               } else
+                       disconnect_client(client, false, "bad cancel request");
+               return false;
+       default:
+               disconnect_client(client, false, "bad pkt");
+               return false;
+       }
+       sbuf_prepare_skip(sbuf, pkt_len);
+       client->request_time = get_cached_time();
+       return true;
+}
+
+/* decide on packets of logged-in client */
+static bool handle_client_work(PgSocket *client, MBuf *pkt)
+{
+       unsigned pkt_type;
+       unsigned pkt_len;
+       bool flush = 0;
+       SBuf *sbuf = &client->sbuf;
+
+       if (!get_header(pkt, &pkt_type, &pkt_len)) {
+               disconnect_client(client, true, "bad pkt header");
+               return false;
+       }
+       slog_noise(client, "pkt='%c' len=%d", pkt_type, pkt_len);
+
+       switch (pkt_type) {
+
+       /* request immidiate response from server */
+       case 'H':               /* Flush */
+               client->flush_req = 1;
+       case 'S':               /* Sync */
+               /* sync is followed by ReadyForQuery */
+
+       /* one-packet queries */
+       case 'Q':               /* Query */
+       case 'F':               /* FunctionCall */
+
+       /* copy end markers */
+       case 'c':               /* CopyDone(F/B) */
+       case 'f':               /* CopyFail(F/B) */
+
+               /* above packets should be sent ASAP */
+               flush = 1;
+
+       /*
+        * extended protocol allows server (and thus pooler)
+        * to buffer packets until sync or flush is sent by client
+        */
+       case 'P':               /* Parse */
+       case 'E':               /* Execute */
+       case 'C':               /* Close */
+       case 'B':               /* Bind */
+       case 'D':               /* Describe */
+       case 'd':               /* CopyData(F/B) */
+
+               /* update stats */
+               if (!client->query_start) {
+                       client->pool->stats.request_count++;
+                       client->query_start = get_time_usec();
+               }
+
+               if (client->pool->admin)
+                       return admin_handle_client(client, pkt, pkt_type, pkt_len);
+
+               /* aquire server */
+               if (!find_server(client))
+                       return false;
+
+               client->pool->stats.client_bytes += pkt_len;
+
+               /* tag the server as dirty */
+               client->link->ready = 0;
+
+               /* forward the packet */
+               sbuf_prepare_send(sbuf, &client->link->sbuf, pkt_len, flush);
+               break;
+
+       /* client wants to go away */
+       default:
+               slog_error(client, "unknown pkt from client: %d/0x%x", pkt_type, pkt_type);
+               disconnect_client(client, true, "unknown pkt");
+               return false;
+       case 'X': /* Terminate */
+               disconnect_client(client, false, "client close request");
+               return false;
+       }
+       return true;
+}
+
+/* callback from SBuf */
+bool client_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg)
+{
+       bool res = false;
+       PgSocket *client = arg;
+
+       Assert(!is_server_socket(client));
+       Assert(client->state != SV_FREE);
+
+       switch (evtype) {
+       case SBUF_EV_CONNECT_OK:
+       case SBUF_EV_CONNECT_FAILED:
+               /* ^ those should not happen */
+       case SBUF_EV_RECV_FAILED:
+               disconnect_client(client, false, "client unexpected eof");
+               break;
+       case SBUF_EV_SEND_FAILED:
+               disconnect_server(client->link, false, "Server connection closed");
+               break;
+       case SBUF_EV_READ:
+               if (mbuf_avail(pkt) < 5) {
+                       log_noise("C: got partial header, trying to wait a bit");
+                       return false;
+               }
+
+               client->request_time = get_cached_time();
+               switch (client->state) {
+               case CL_LOGIN:
+                       res = handle_client_startup(client, pkt);
+                       break;
+               case CL_ACTIVE:
+                       if (client->wait_for_welcome)
+                               res = handle_client_startup(client, pkt);
+                       else
+                               res = handle_client_work(client, pkt);
+                       break;
+               case CL_WAITING:
+                       fatal("why waiting client in client_proto()");
+               default:
+                       fatal("bad client state: %d", client->state);
+               }
+       }
+       return res;
+}
+
diff --git a/src/client.h b/src/client.h
new file mode 100644 (file)
index 0000000..136a1a0
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+bool client_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg);
+bool set_pool(PgSocket *client, const char *dbname, const char *username);
+
+
diff --git a/src/janitor.c b/src/janitor.c
new file mode 100644 (file)
index 0000000..9d2d554
--- /dev/null
@@ -0,0 +1,445 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Periodic maintenance.
+ */
+
+#include "bouncer.h"
+
+static struct timeval full_maint_period = {0, USEC / 3};
+static struct event full_maint_ev;
+
+/* close all sockets in server list */
+static void close_server_list(StatList *sk_list, const char *reason)
+{
+       List *item, *tmp;
+       PgSocket *server;
+
+       statlist_for_each_safe(item, sk_list, tmp) {
+               server = container_of(item, PgSocket, head);
+               disconnect_server(server, true, reason);
+       }
+}
+
+/* suspend all sockets in socket list */
+static int suspend_socket_list(StatList *list)
+{
+       List *item;
+       PgSocket *sk;
+       int active = 0;
+
+       statlist_for_each(item, list) {
+               sk = container_of(item, PgSocket, head);
+               if (!sk->suspended) {
+                       if (sbuf_empty(&sk->sbuf)) {
+                               sbuf_pause(&sk->sbuf);
+                               sk->suspended = 1;
+                       } else
+                               active++;
+               }
+       }
+       return active;
+}
+
+/* resume all suspended sockets in socket list */
+static void resume_socket_list(StatList *list)
+{
+       List *item, *tmp;
+       PgSocket *sk;
+
+       statlist_for_each_safe(item, list, tmp) {
+               sk = container_of(item, PgSocket, head);
+               if (sk->suspended) {
+                       sk->suspended = 0;
+                       sbuf_continue(&sk->sbuf);
+               }
+       }
+}
+
+/* resume all suspended sockets in all pools */
+static void resume_sockets(void)
+{
+       List *item;
+       PgPool *pool;
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               if (pool->admin)
+                       continue;
+               resume_socket_list(&pool->active_client_list);
+               resume_socket_list(&pool->active_server_list);
+               resume_socket_list(&pool->idle_server_list);
+               resume_socket_list(&pool->used_server_list);
+       }
+}
+
+/* resume pools and listen sockets */
+void resume_all(void)
+{
+       resume_sockets();
+       resume_pooler();
+}
+
+/*
+ * send test/reset query to server if needed
+ */
+static void launch_recheck(PgPool *pool)
+{
+       const char *q = cf_server_check_query;
+       bool need_check = true;
+       PgSocket *server;
+       bool res = true;
+
+       server = first_socket(&pool->used_server_list);
+
+       /* is the check needed? */
+       if (q == NULL || q[0] == 0)
+               need_check = false;
+       else if (cf_server_check_delay > 0) {
+               usec_t now = get_cached_time();
+               if (now - server->request_time < cf_server_check_delay)
+                       need_check = false;
+       }
+
+       if (need_check) {
+               /* send test query, wait for result */
+               change_server_state(server, SV_TESTED);
+               SEND_generic(res, server, 'Q', "s", q);
+               if (!res)
+                       disconnect_server(server, false, "test query failed");
+       } else
+               /* make immidiately available */
+               change_server_state(server, SV_IDLE);
+}
+
+/*
+ * make servers available
+ */
+static void per_loop_activate(PgPool *pool)
+{
+       List *item, *tmp;
+       PgSocket *client;
+
+       /* see if any server have been freed */
+       statlist_for_each_safe(item, &pool->waiting_client_list, tmp) {
+               client = container_of(item, PgSocket, head);
+               if (!statlist_empty(&pool->idle_server_list)) {
+
+                       /* db not fully initialized after reboot */
+                       if (client->wait_for_welcome && !pool->db->welcome_msg_ready) {
+                               launch_new_connection(pool);
+                               continue;
+                       }
+
+                       /* there is a ready server already */
+                       activate_client(client);
+               } else if (!statlist_empty(&pool->tested_server_list)) {
+                       /* some connections are in testing process */
+
+                       /* not enough connections? (X) */
+                       launch_new_connection(pool);
+                       break;
+               } else if (!statlist_empty(&pool->used_server_list)) {
+                       /* ask for more connections to be tested */
+                       launch_recheck(pool);
+
+                       /* not enough connections? (X) */
+                       launch_new_connection(pool);
+                       break;
+               } else {
+                       /* not enough connections */
+                       launch_new_connection(pool);
+                       break;
+               }
+       }
+}
+/*
+ * (X) - theres some problem in light load with small server_check_timeout
+ * where waiting connection wont ever get server connection.
+ */
+
+/*
+ * pause active clients
+ */
+static int per_loop_pause(PgPool *pool)
+{
+       int active = 0;
+
+       if (pool->admin)
+               return 0;
+
+       close_server_list(&pool->idle_server_list, "pause mode");
+       close_server_list(&pool->used_server_list, "pause mode");
+       close_server_list(&pool->new_server_list, "pause mode");
+
+       active += statlist_count(&pool->active_server_list);
+       active += statlist_count(&pool->tested_server_list);
+
+       return active;
+}
+
+/*
+ * suspend active clients and servers
+ */
+static int per_loop_suspend(PgPool *pool)
+{
+       int active = 0;
+
+       if (pool->admin)
+               return 0;
+
+       active += suspend_socket_list(&pool->active_client_list);
+
+       if (!statlist_empty(&pool->waiting_client_list)) {
+               active += statlist_count(&pool->waiting_client_list);
+               per_loop_activate(pool);
+       }
+
+       if (!active) {
+               active += suspend_socket_list(&pool->active_server_list);
+               active += suspend_socket_list(&pool->idle_server_list);
+               active += statlist_count(&pool->tested_server_list);
+
+               /* as all clients are done, no need for them */
+               close_server_list(&pool->used_server_list, "close unsafe fds on suspend");
+       }
+
+       return active;
+}
+
+/*
+ * this function is called for each event loop.
+ */
+void per_loop_object_maint(void)
+{
+       List *item;
+       PgPool *pool;
+       int active = 0;
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               if (pool->admin)
+                       continue;
+               switch (cf_pause_mode) {
+               case 0:
+                       per_loop_activate(pool);
+                       break;
+               case 1:
+                       active += per_loop_pause(pool);
+                       break;
+               case 2:
+                       active += per_loop_suspend(pool);
+                       break;
+               }
+       }
+
+       switch (cf_pause_mode) {
+       case 2:
+               active += statlist_count(&login_client_list);
+       case 1:
+               if (!active)
+                       admin_pause_done();
+       default:
+               break;
+       }
+}
+
+/* maintaing clients in pool */
+static void pool_client_maint(PgPool *pool)
+{
+       List *item, *tmp;
+       usec_t now = get_cached_time();
+       PgSocket *client;
+       usec_t age;
+
+       /* force client_idle_timeout */
+       if (cf_client_idle_timeout > 0) {
+               statlist_for_each_safe(item, &pool->active_client_list, tmp) {
+                       client = container_of(item, PgSocket, head);
+                       Assert(client->state == CL_ACTIVE);
+                       if (client->link)
+                               continue;
+                       if (now - client->request_time > cf_client_idle_timeout)
+                               disconnect_client(client, true, "idle_timeout");
+               }
+       }
+
+       /* force client_query_timeout */
+       if (cf_query_timeout > 0) {
+               statlist_for_each_safe(item, &pool->waiting_client_list, tmp) {
+                       client = container_of(item, PgSocket, head);
+                       Assert(client->state == CL_WAITING);
+                       if (client->query_start == 0) {
+                               age = now - client->request_time;
+                               log_warning("query_start==0");
+                       } else
+                               age = now - client->query_start;
+                       if (age > cf_query_timeout)
+                               disconnect_client(client, true, "query_timeout");
+               }
+       }
+}
+
+static void check_unused_servers(StatList *slist, usec_t now, bool idle_test)
+{
+       List *item, *tmp;
+       usec_t idle, age;
+       PgSocket *server;
+
+       /* disconnect idle servers if needed */
+       statlist_for_each_safe(item, slist, tmp) {
+               server = container_of(item, PgSocket, head);
+
+               age = now - server->connect_time;
+               idle = now - server->request_time;
+
+               if (server->close_needed)
+                       disconnect_server(server, true, "db conf changed");
+               else if (cf_server_idle_timeout > 0 && idle > cf_server_idle_timeout)
+                       disconnect_server(server, true, "server idle timeout");
+               else if (cf_server_lifetime > 0 && age > cf_server_lifetime)
+                       disconnect_server(server, true, "server lifetime over");
+               else if (cf_pause_mode == 1)
+                       disconnect_server(server, true, "pause mode");
+               else if (idle_test && *cf_server_check_query) {
+                       if (idle > cf_server_check_delay)
+                               change_server_state(server, SV_USED);
+               }
+       }
+}
+
+/*
+ * Check pool size, close conns if too many.  Makes pooler
+ * react faster to the case when admin decreased pool size.
+ */
+static void check_pool_size(PgPool *pool)
+{
+       PgSocket *server;
+       int cur = statlist_count(&pool->active_server_list)
+               + statlist_count(&pool->idle_server_list)
+               + statlist_count(&pool->used_server_list)
+               + statlist_count(&pool->tested_server_list);
+               
+               /* cancel pkt may create new srv conn without
+                * taking pool_size into account
+                *
+                * statlist_count(&pool->new_server_list)
+                */
+
+       int many = cur - pool->db->pool_size;
+
+       Assert(pool->db->pool_size >= 0);
+
+       while (many > 0) {
+               server = first_socket(&pool->used_server_list);
+               if (!server)
+                       server = first_socket(&pool->idle_server_list);
+               if (!server)
+                       break;
+               disconnect_server(server, true, "too many servers in pool");
+               many--;
+       }
+}
+
+/* maintain servers in a pool */
+static void pool_server_maint(PgPool *pool)
+{
+       List *item, *tmp;
+       usec_t age, now = get_cached_time();
+       PgSocket *server;
+
+       /* find and disconnect idle servers */
+       check_unused_servers(&pool->used_server_list, now, 0);
+       check_unused_servers(&pool->tested_server_list, now, 0);
+       check_unused_servers(&pool->idle_server_list, now, 1);
+
+       /* where query got did not get answer in query_timeout */
+       if (cf_query_timeout > 0) {
+               statlist_for_each_safe(item, &pool->active_server_list, tmp) {
+                       server = container_of(item, PgSocket, head);
+                       Assert(server->state == SV_ACTIVE);
+                       if (server->ready)
+                               continue;
+                       age = now - server->link->request_time;
+                       if (age > cf_query_timeout)
+                               disconnect_server(server, true, "statement timeout");
+               }
+       }
+
+       /* find connections that got connect, but could not log in */
+       if (cf_server_connect_timeout > 0) {
+               statlist_for_each_safe(item, &pool->new_server_list, tmp) {
+                       server = container_of(item, PgSocket, head);
+                       Assert(server->state == SV_LOGIN);
+
+                       age = now - server->connect_time;
+                       if (age > cf_server_connect_timeout)
+                               disconnect_server(server, true, "connect timeout");
+               }
+       }
+
+       check_pool_size(pool);
+}
+
+/* full-scale maintenenace, done only occasionally */
+static void do_full_maint(int sock, short flags, void *arg)
+{
+       List *item;
+       PgPool *pool;
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               if (pool->admin)
+                       continue;
+               pool_server_maint(pool);
+               pool_client_maint(pool);
+       }
+
+       if (cf_shutdown && get_active_server_count() == 0) {
+               log_info("server connections dropped, exiting");
+               exit(0);
+       }
+
+       loader_users_check();
+
+       evtimer_add(&full_maint_ev, &full_maint_period);
+}
+
+/* first-time initializtion */
+void janitor_setup(void)
+{
+       /* launch maintenance */
+       evtimer_set(&full_maint_ev, do_full_maint, NULL);
+       evtimer_add(&full_maint_ev, &full_maint_period);
+}
+
+/* as [pgbouncer] section can be loaded after databases,
+   theres need for review */
+void config_postprocess(void)
+{
+       List *item;
+       PgDatabase *db;
+
+       statlist_for_each(item, &database_list) {
+               db = container_of(item, PgDatabase, head);
+               if (db->pool_size < 0)
+                       db->pool_size = cf_default_pool_size;
+       }
+}
+
diff --git a/src/janitor.h b/src/janitor.h
new file mode 100644 (file)
index 0000000..2c8ccd1
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void janitor_setup(void);
+void config_postprocess(void);
+void resume_all(void);
+void per_loop_object_maint(void);
+
diff --git a/src/list.h b/src/list.h
new file mode 100644 (file)
index 0000000..8bc2854
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Circular doubly linked list implementation.
+ *
+ * Basic idea from <linux/list.h>.
+ *
+ * <sys/queue.h> seemed usable, but overcomplicated.
+ */
+
+#ifndef __LIST_H_
+#define __LIST_H_
+
+/* turn on slow checking */
+#if defined(CASSERT) && !defined(LIST_DEBUG)
+#define LIST_DEBUG
+#endif
+
+/* give offset of a field inside struct */
+#ifndef offsetof
+#define offsetof(type, field) ((unsigned)&(((type *)0)->field))
+#endif
+
+/* given pointer to field inside struct, return pointer to struct */
+#ifndef container_of
+#define container_of(ptr, type, field) ((type *)((char *)(ptr) - offsetof(type, field)))
+#endif
+
+/* list type */
+typedef struct List List;
+struct List {
+       List *next;
+       List *prev;
+};
+
+#define LIST(var) List var = { &var, &var }
+
+/* initialize struct */
+static inline void list_init(List *list)
+{
+       list->next = list->prev = list;
+}
+
+/* is list empty? */
+static inline bool list_empty(List *list)
+{
+       return list->next == list;
+}
+
+/* add item to the start of the list */
+static inline List *list_prepend(List *item, List *list)
+{
+       Assert(list_empty(item));
+
+       item->next = list->next;
+       item->prev = list;
+       list->next->prev = item;
+       list->next = item;
+       return item;
+}
+
+/* add item to the end of the list */
+static inline List *list_append(List *item, List *list)
+{
+       Assert(list_empty(item));
+
+       item->next = list;
+       item->prev = list->prev;
+       list->prev->next = item;
+       list->prev = item;
+       return item;
+}
+
+/* remove item from list */
+static inline List *list_del(List *item)
+{
+       item->prev->next = item->next;
+       item->next->prev = item->prev;
+       item->next = item->prev = item;
+       return item;
+}
+
+/* remove first from list and return */
+static inline List *list_pop(List *list)
+{
+       if (list_empty(list))
+               return NULL;
+       return list_del(list->next);
+}
+
+/* remove first from list and return */
+static inline List *list_first(List *list)
+{
+       if (list_empty(list))
+               return NULL;
+       return list->next;
+}
+
+/* remove first elem from list and return with casting */
+#define list_pop_type(list, typ, field) \
+       (list_empty(list) ? NULL \
+        : container_of(list_del((list)->next), typ, field))
+
+/* loop over list */
+#define list_for_each(item, list) \
+       for ((item) = (list)->next; \
+            (item) != (list); \
+            (item) = (item)->next)
+
+/* loop over list and allow removing item */
+#define list_for_each_safe(item, list, tmp) \
+       for ((item) = (list)->next, (tmp) = (list)->next->next; \
+            (item) != (list); \
+            (item) = (tmp), (tmp) = (tmp)->next)
+
+static inline bool item_in_list(List *item, List *list)
+{
+       List *tmp;
+       list_for_each(tmp, list)
+               if (tmp == item)
+                       return 1;
+       return 0;
+}
+
+
+/*
+ * wrapper for List that keeps track of number of items
+ */
+
+typedef struct StatList StatList;
+struct StatList {
+       List head;
+       int cur_count;
+       int max_count;
+       const char *name;
+};
+
+#define STATLIST(var) StatList var = { {&var.head, &var.head}, 0, 0, #var }
+
+static inline void statlist_reset(StatList *list)
+{
+       list->max_count = list->cur_count;
+}
+
+static inline void statlist_prepend(List *item, StatList *list)
+{
+       list_prepend(item, &list->head);
+       list->cur_count ++;
+       if (list->cur_count > list->max_count)
+               list->max_count = list->cur_count;
+}
+
+static inline void statlist_append(List *item, StatList *list)
+{
+       list_append(item, &list->head);
+       list->cur_count ++;
+       if (list->cur_count > list->max_count)
+               list->max_count = list->cur_count;
+}
+
+static inline void statlist_put_before(List *item, StatList *list, List *pos)
+{
+       list_append(item, pos);
+       list->cur_count++;
+       if (list->cur_count > list->max_count)
+               list->max_count = list->cur_count;
+}
+
+static inline void statlist_remove(List *item, StatList *list)
+{
+#ifdef LIST_DEBUG
+       /* sanity check */
+       if (!item_in_list(item, &list->head))
+               fatal("item in wrong list, expected: %s", list->name);
+#endif
+
+       list_del(item);
+       list->cur_count--;
+
+       Assert(list->cur_count >= 0);
+}
+
+static inline void statlist_init(StatList *list, const char *name)
+{
+       list_init(&list->head);
+       list->name = name;
+       list->cur_count = list->max_count = 0;
+}
+
+static inline int statlist_count(StatList *list)
+{
+       Assert(list->cur_count > 0 || list_empty(&list->head));
+       return list->cur_count;
+}
+
+static inline int statlist_max(StatList *list)
+{
+       return list->max_count > list->cur_count
+               ? list->max_count : list->cur_count;
+}
+
+static inline List *statlist_pop(StatList *list)
+{
+       List *item = list_pop(&list->head);
+
+       if (item)
+               list->cur_count--;
+
+       Assert(list->cur_count >= 0);
+
+       return item;
+}
+
+static inline List *statlist_first(StatList *list)
+{
+       return list_first(&list->head);
+}
+
+static inline bool statlist_empty(StatList *list)
+{
+       return list_empty(&list->head);
+}
+
+#define statlist_for_each(item, list) list_for_each(item, &((list)->head))
+#define statlist_for_each_safe(item, list, tmp) list_for_each_safe(item, &((list)->head), tmp)
+
+#endif /* __LIST_H_ */
+
diff --git a/src/loader.c b/src/loader.c
new file mode 100644 (file)
index 0000000..1e31b2c
--- /dev/null
@@ -0,0 +1,529 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Config and pg_auth file reading.
+ */
+
+#include "bouncer.h"
+
+/*
+ * ConnString parsing
+ */
+
+/* get key=val pair from connstring */
+static char * getpair(char *p,
+                     char **key_p, int *key_len,
+                     char **val_p, int *val_len)
+{
+       while (*p && *p == ' ')
+               p++;
+       *key_p = p;
+       while (*p && *p != '=' && *p != ' ')
+               p++;
+       *key_len = p - *key_p;
+       if (*p == '=')
+               p++;
+       *val_p = p;
+       while (*p && *p != ' ')
+               p++;
+       *val_len = p - *val_p;
+
+       while (*p && *p == ' ')
+               p++;
+       return p;
+}
+
+/* fill PgDatabase from connstr */
+void parse_database(char *name, char *connstr)
+{
+       char *p, *key, *val;
+       int klen, vlen;
+       PktBuf buf;
+       PgDatabase *db;
+       int pool_size = -1;
+
+       char *dbname = name;
+       char *host = NULL;
+       char *port = "5432";
+       char *username = NULL;
+       char *password = "";
+       char *client_encoding = NULL;
+       char *datestyle = NULL;
+
+       in_addr_t v_addr = INADDR_NONE;
+       int v_port;
+
+       p = connstr;
+       while (*p) {
+               p = getpair(p, &key, &klen, &val, &vlen);
+               if (*key == 0 || *val == 0 || klen == 0 || vlen == 0)
+                       break;
+               key[klen] = 0;
+               val[vlen] = 0;
+
+               if (strcmp("dbname", key) == 0)
+                       dbname = val;
+               else if (strcmp("host", key) == 0)
+                       host = val;
+               else if (strcmp("port", key) == 0)
+                       port = val;
+               else if (strcmp("user", key) == 0)
+                       username = val;
+               else if (strcmp("password", key) == 0)
+                       password = val;
+               else if (strcmp("client_encoding", key) == 0)
+                       client_encoding = val;
+               else if (strcmp("datestyle", key) == 0)
+                       datestyle = val;
+               else if (strcmp("pool_size", key) == 0)
+                       pool_size = atoi(val);
+               else {
+                       log_error("skipping database %s because"
+                                 " of bad connstring: %s", name, connstr);
+                       return;
+               }
+       }
+
+       if (!host) {
+               if (!cf_unix_socket_dir) {
+                       log_error("skipping database %s because"
+                               " unix socket not configured", name);
+                       return;
+               }
+       } else {
+               v_addr = inet_addr(host);
+               if (v_addr == INADDR_NONE) {
+                       log_error("skipping database %s because"
+                                       " of bad host: %s", name, host);
+                       return;
+               }
+       }
+       v_port = atoi(port);
+       if (v_port == 0) {
+               log_error("skipping database %s because"
+                         " of bad port: %s", name, port);
+               return;
+       }
+
+       db = add_database(name);
+       if (!db) {
+               log_error("cannot create database, no mem?");
+               return;
+       }
+
+       if (db->dbname) {
+               bool changed = false;
+               if (strcmp(db->dbname, dbname) != 0)
+                       changed = true;
+               else if (host && db->addr.is_unix)
+                       changed = true;
+               else if (!host && !db->addr.is_unix)
+                       changed = true;
+               else if (host && v_addr != db->addr.ip_addr.s_addr)
+                       changed = true;
+               else if (v_port != db->addr.port)
+                       changed = true;
+               else if (username && !db->forced_user)
+                       changed = true;
+               else if (username && strcmp(username, db->forced_user->name))
+                       changed = true;
+               else if (!username && db->forced_user)
+                       changed = true;
+
+               if (changed)
+                       tag_database_dirty(db);
+       }
+
+       /* if pool_size < 0 it will be set later */
+       db->pool_size = pool_size;
+       db->addr.port = v_port;
+       db->addr.ip_addr.s_addr = v_addr;
+       db->addr.is_unix = host ? 0 : 1;
+
+       pktbuf_static(&buf, db->startup_params, sizeof(db->startup_params));
+
+       pktbuf_put_string(&buf, "database");
+       db->dbname = (char *)db->startup_params + pktbuf_written(&buf);
+       pktbuf_put_string(&buf, dbname);
+
+       if (client_encoding) {
+               pktbuf_put_string(&buf, "client_encoding");
+               pktbuf_put_string(&buf, client_encoding);
+       }
+
+       if (datestyle) {
+               pktbuf_put_string(&buf, "datestyle");
+               pktbuf_put_string(&buf, datestyle);
+       }
+
+       db->startup_params_len = pktbuf_written(&buf);
+
+       /* if user is forces, create fake object for it */
+       if (username != NULL) {
+               if (!force_user(db, username, password))
+                       log_warning("db setup failed, trying to continue");
+       } else if (db->forced_user)
+               log_warning("losing forced user not supported,"
+                           " keeping old setting");
+}
+
+/*
+ * User file parsing
+ */
+
+/* find next " in string, skipping escaped ones */
+static char *find_quote(char *p)
+{
+loop:
+       while (*p && *p != '\\' && *p != '"') p++;
+       if (*p == '\\' && p[1]) {
+               p += 2;
+               goto loop;
+       }
+
+       return p;
+}
+
+/* string is unquoted while copying */
+static void copy_quoted(char *dst, const char *src, int len)
+{
+       char *end = dst + len - 1;
+       while (*src && dst < end) {
+               if (*src != '\\')
+                       *dst++ = *src++;
+               else
+                       src++;
+       }
+       *dst = 0;
+}
+
+static void unquote_add_user(const char *username, const char *password)
+{
+       char real_user[MAX_USERNAME];
+       char real_passwd[MAX_PASSWORD];
+       PgUser *user;
+
+       copy_quoted(real_user, username, sizeof(real_user));
+       copy_quoted(real_passwd, password, sizeof(real_passwd));
+
+       user = add_user(real_user, real_passwd);
+       if (!user)
+               log_warning("cannot create user, no mem");
+}
+
+static bool auth_loaded(const char *fn)
+{
+       static struct stat cache;
+       struct stat cur;
+
+       /* hack for resetting */
+       if (fn == NULL) {
+               memset(&cache, 0, sizeof(cache));
+               return false;
+       }
+
+       if (stat(fn, &cur) < 0)
+               return false;
+
+       if (cache.st_dev == cur.st_dev
+       && cache.st_ino == cur.st_ino
+       && cache.st_mode == cur.st_mode
+       && cache.st_uid == cur.st_gid
+       && cache.st_mtime == cur.st_mtime
+       && cache.st_size == cur.st_size)
+               return true;
+       cache = cur;
+       return false;
+}
+
+bool loader_users_check(void)
+{
+       if (auth_loaded(cf_auth_file))
+               return true;
+
+       return load_auth_file(cf_auth_file);
+}
+
+/* load list of users from pg_auth/pg_psw file */
+bool load_auth_file(const char *fn)
+{
+       char *user, *password, *buf, *p;
+
+       buf = load_file(fn);
+       if (buf == NULL) {
+               /* reset file info */
+               auth_loaded(NULL);
+               return false;
+       }
+
+       p = buf;
+       while (*p) {
+               /* skip whitespace and empty lines */
+               while (*p && isspace(*p)) p++;
+               if (!*p)
+                       break;
+
+               /* start of line */
+               if (*p != '"') {
+                       log_error("broken auth file");
+                       break;
+               }
+               user = ++p;
+               p = find_quote(p);
+               if (*p != '"') {
+                       log_error("broken auth file");
+                       break;
+               }
+               if (p - user >= MAX_USERNAME) {
+                       log_error("too long username");
+                       break;
+               }
+               *p++ = 0; /* tag username end */
+               
+               /* get password */
+               p = find_quote(p);
+               if (*p != '"') {
+                       log_error("broken auth file");
+                       break;
+               }
+               password = ++p;
+               p = find_quote(p);
+               if (*p != '"') {
+                       log_error("broken auth file");
+                       break;
+               }
+               if (p - password >= MAX_PASSWORD) {
+                       log_error("too long password");
+                       break;
+               }
+               *p++ = 0; /* tag password end */
+
+               /* send them away */
+               unquote_add_user(user, password);
+
+               /* skip rest of the line */
+               while (*p && *p != '\n') p++;
+       }
+       free(buf);
+
+       create_auth_cache();
+
+       return true;
+}
+
+/*
+ * INI file parser
+ */
+
+bool cf_set_int(ConfElem *elem, const char *val, PgSocket *console)
+{
+       int *int_p = elem->dst;
+       if (*val < '0' || *val > '9') {
+               admin_error(console, "bad value: %s", val);
+               return false;
+       }
+       *int_p = atoi(val);
+       return true;
+}
+
+const char *cf_get_int(ConfElem *elem)
+{
+       static char numbuf[32];
+       int val;
+
+       val = *(int *)elem->dst;
+       sprintf(numbuf, "%d", val);
+       return numbuf;
+}
+bool cf_set_time(ConfElem *elem, const char *val, PgSocket *console)
+{
+       usec_t *time_p = elem->dst;
+       if (*val < '0' || *val > '9') {
+               admin_error(console, "bad value: %s", val);
+               return false;
+       }
+       *time_p = USEC * (usec_t)atoi(val);
+       return true;
+}
+
+const char *cf_get_time(ConfElem *elem)
+{
+       static char numbuf[32];
+       usec_t val;
+
+       val = *(usec_t *)elem->dst;
+       sprintf(numbuf, "%d", (int)(val / USEC));
+       return numbuf;
+}
+
+bool cf_set_str(ConfElem *elem, const char *val, PgSocket *console)
+{
+       char **str_p = elem->dst;
+       char *tmp;
+
+       /* dont touch if not changed */
+       if (*str_p && strcmp(*str_p, val) == 0)
+               return true;
+
+       /* if dynamically allocated, free it */
+       if (elem->allocated)
+               free(*str_p);
+
+       tmp = strdup(val);
+       if (!tmp)
+               return false;
+
+       *str_p = tmp;
+       elem->allocated = true;
+       return true;
+}
+
+const char * cf_get_str(ConfElem *elem)
+{
+       return *(char **)elem->dst;
+}
+
+bool set_config_param(ConfElem *elem_list,
+                     const char *key, const char *val,
+                     bool reload, PgSocket *console)
+{
+       ConfElem *desc;
+
+       for (desc = elem_list; desc->name; desc++) {
+               if (strcasecmp(key, desc->name))
+                       continue;
+       
+               /* if reload not allowed, skip it */
+               if (reload && !desc->reloadable) {
+                       if (console)
+                               admin_error(console,
+                                       "%s cannot be changed online", key);
+                       return false;
+               }
+
+               /* got config, parse it */
+               return desc->io.fn_set(desc, val, console);
+       }
+       admin_error(console, "unknown config parameter: %s", key);
+       return false;
+}
+
+static void map_config(ConfSection *sect, char *key, char *val, bool reload)
+{
+       if (sect == NULL)
+               return;
+
+       if (sect->data_fn)
+               sect->data_fn(key, val);
+       else
+               set_config_param(sect->elem_list, key, val, reload, NULL);
+}
+
+const char *conf_to_text(ConfElem *elem)
+{
+       return elem->io.fn_get(elem);
+}
+
+static ConfSection *find_section(ConfSection *sect, const char *name)
+{
+       for (; sect->name; sect++)
+               if (strcasecmp(sect->name, name) == 0)
+                       return sect;
+       log_warning("unknown section in config: %s", name);
+       return NULL;
+}
+
+void iniparser(const char *fn, ConfSection *sect_list, bool reload)
+{
+       char *buf;
+       char *p, *key, *val;
+       int klen, vlen;
+       ConfSection *cur_section = NULL;
+
+       buf = load_file(fn);
+       if (buf == NULL) {
+               if (!reload)
+                       exit(1);
+               else
+                       return;
+       }
+
+       p = buf;
+       while (*p) {
+               /* space at the start of line - including empty lines */
+               while (*p && isspace(*p)) p++;
+
+               /* skip comment lines */
+               if (*p == '#' || *p == ';') {
+                       while (*p && *p != '\n') p++;
+                       continue;
+               }
+               /* got new section */
+               if (*p == '[') {
+                       key = ++p;
+                       while (*p && *p != ']' && *p != '\n') p++;
+                       if (*p != ']') {
+                               log_warning("bad section header");
+                               cur_section = NULL;
+                               continue;
+                       }
+                       *p++ = 0;
+
+                       cur_section = find_section(sect_list, key);
+                       continue;
+               }
+
+               /* done? */
+               if (*p == 0) break;
+
+               /* read key val */
+               key = p;
+               while (*p && (isalnum(*p) || *p == '_')) p++;
+               klen = p - key;
+
+               /* expect '=', skip it */
+               while (*p && (*p == ' ' || *p == '\t')) p++;
+               if (*p != '=') {
+                       log_error("syntax error in config, stopping loading");
+                       break;
+               } else
+                       p++;
+               while (*p && (*p == ' ' || *p == '\t')) p++;
+
+               /* now read value */
+               val = p;
+               while (*p && (*p != '\n'))
+                       p++;
+               vlen = p - val;
+               /* eat space at end */
+               while (vlen > 0 && isspace(val[vlen - 1]))
+                       vlen--;
+
+               /* skip junk */
+               while (*p && isspace(*p)) p++;
+
+               /* our buf is r/w, so take it easy */
+               key[klen] = 0;
+               val[vlen] = 0;
+               map_config(cur_section, key, val, reload);
+       }
+
+       free(buf);
+}
+
diff --git a/src/loader.h b/src/loader.h
new file mode 100644 (file)
index 0000000..957d883
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* configuration parsing */
+#define CF_INT         {cf_get_int, cf_set_int}
+#define CF_STR         {cf_get_str, cf_set_str}
+#define CF_TIME                {cf_get_time, cf_set_time}
+
+#define CF_SECT_VARS   1       /* senction contains pre-defined key-var pairs */
+#define CF_SECT_DATA   2       /* key-val pairs are data */
+
+typedef struct ConfElem ConfElem;
+
+/* callback for CF_SECT_DATA loading */
+typedef void (*conf_data_callback_fn)(char *key, char *value);
+
+typedef const char * (*conf_var_get_fn)(ConfElem *elem);
+typedef bool (*conf_var_set_fn)(ConfElem *elem, const char *value, PgSocket *console);
+
+typedef struct {
+       conf_var_get_fn fn_get;
+       conf_var_set_fn fn_set;
+} ConfAccess;
+
+struct ConfElem {
+       const char *name;
+       bool reloadable;
+       ConfAccess io;
+       void *dst;
+       bool allocated;
+};
+
+typedef struct ConfSection {
+       const char *name;
+       ConfElem *elem_list;
+       conf_data_callback_fn data_fn;
+} ConfSection;
+
+void iniparser(const char *fn, ConfSection *sect_list, bool reload);
+
+const char * cf_get_int(ConfElem *elem);
+bool cf_set_int(ConfElem *elem, const char *value, PgSocket *console);
+
+const char * cf_get_time(ConfElem *elem);
+bool cf_set_time(ConfElem *elem, const char *value, PgSocket *console);
+
+const char *cf_get_str(ConfElem *elem);
+bool cf_set_str(ConfElem *elem, const char *value, PgSocket *console);
+
+const char *conf_to_text(ConfElem *elem);
+bool set_config_param(ConfElem *elem_list, const char *key, const char *val, bool reload, PgSocket *console);
+
+/* connstring parsing */
+void parse_database(char *name, char *connstr);
+
+/* user file parsing */
+bool load_auth_file(const char *fn);
+bool loader_users_check(void);
+
diff --git a/src/main.c b/src/main.c
new file mode 100644 (file)
index 0000000..7841c53
--- /dev/null
@@ -0,0 +1,459 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Launcer for all the rest.
+ */
+
+#include "bouncer.h"
+
+#include <signal.h>
+#include <getopt.h>
+
+static bool set_mode(ConfElem *elem, const char *val, PgSocket *console);
+static const char *get_mode(ConfElem *elem);
+static bool set_auth(ConfElem *elem, const char *val, PgSocket *console);
+static const char *get_auth(ConfElem *elem);
+
+static const char *usage_str =
+"usage: pgbouncer [-d] [-v] [-h|-V] config.ini\n";
+
+static void usage(int err)
+{
+       printf(usage_str);
+       exit(err);
+}
+
+/*
+ * configuration storage
+ */
+
+int cf_verbose = 0;
+int cf_daemon = 0;
+int cf_pause_mode = 0;
+int cf_shutdown = 0;
+int cf_reboot = 0;
+static char *cf_config_file;
+
+char *cf_listen_addr = NULL;
+int cf_listen_port = 6000;
+char *cf_unix_socket_dir = "/tmp";
+
+int cf_pool_mode = POOL_SESSION;
+
+/* sbuf config */
+int cf_sbuf_len = 2048;
+int cf_tcp_socket_buffer = 0;
+#ifdef TCP_DEFER_ACCEPT
+int cf_tcp_defer_accept = 45;
+#else
+int cf_tcp_defer_accept = 0;
+#endif
+int cf_tcp_keepalive = 0;
+int cf_tcp_keepcnt = 0;
+int cf_tcp_keepidle = 0;
+int cf_tcp_keepintvl = 0;
+
+int cf_auth_type = AUTH_MD5;
+char *cf_auth_file = "unconfigured_file";
+
+int cf_max_client_conn = 20;
+int cf_default_pool_size = 10;
+
+char *cf_server_check_query = "select 1";
+usec_t cf_server_check_delay = 30 * USEC;
+
+usec_t cf_server_lifetime = 60*60*USEC;
+usec_t cf_server_idle_timeout = 10*60*USEC;
+usec_t cf_server_connect_timeout = 15*USEC;
+usec_t cf_server_login_retry = 15*USEC;
+usec_t cf_query_timeout = 0*USEC;
+usec_t cf_client_idle_timeout = 0*USEC;
+
+char *cf_logfile = NULL;
+char *cf_pidfile = NULL;
+static char *cf_jobname = NULL;
+
+char *cf_admin_users = "";
+char *cf_stats_users = "";
+int cf_stats_period = 60;
+
+
+/*
+ * config file description
+ */
+ConfElem bouncer_params[] = {
+{"job_name",           true, CF_STR, &cf_jobname},
+{"conffile",           true, CF_STR, &cf_config_file},
+{"logfile",            true, CF_STR, &cf_logfile},
+{"pidfile",            false, CF_STR, &cf_pidfile},
+{"listen_addr",                false, CF_STR, &cf_listen_addr},
+{"listen_port",                false, CF_INT, &cf_listen_port},
+{"unix_socket_dir",    false, CF_STR, &cf_unix_socket_dir},
+{"auth_type",          true, {get_auth, set_auth}},
+{"auth_file",          true, CF_STR, &cf_auth_file},
+{"pool_mode",          true, {get_mode, set_mode}},
+{"max_client_conn",    true, CF_INT, &cf_max_client_conn},
+{"default_pool_size",  true, CF_INT, &cf_default_pool_size},
+
+{"server_check_query", true, CF_STR, &cf_server_check_query},
+{"server_check_delay", true, CF_TIME, &cf_server_check_delay},
+{"query_timeout",      true, CF_TIME, &cf_query_timeout},
+{"client_idle_timeout",        true, CF_TIME, &cf_client_idle_timeout},
+{"server_lifetime",    true, CF_TIME, &cf_server_lifetime},
+{"server_idle_timeout",        true, CF_TIME, &cf_server_idle_timeout},
+{"server_connect_timeout",true, CF_TIME, &cf_server_connect_timeout},
+{"server_login_retry", true, CF_TIME, &cf_server_login_retry},
+
+{"pkt_buf",            false, CF_INT, &cf_sbuf_len},
+{"tcp_defer_accept",   false, CF_INT, &cf_tcp_defer_accept},
+{"tcp_socket_buffer",  true, CF_INT, &cf_tcp_socket_buffer},
+{"tcp_keepalive",      true, CF_INT, &cf_tcp_keepalive},
+{"tcp_keepcnt",                true, CF_INT, &cf_tcp_keepcnt},
+{"tcp_keepidle",       true, CF_INT, &cf_tcp_keepidle},
+{"tcp_keepintvl",      true, CF_INT, &cf_tcp_keepintvl},
+{"verbose",            true, CF_INT, &cf_verbose},
+{"admin_users",                true, CF_STR, &cf_admin_users},
+{"stats_users",                true, CF_STR, &cf_stats_users},
+{"stats_period",       true, CF_INT, &cf_stats_period},
+{NULL},
+};
+
+static ConfSection bouncer_config [] = {
+{"pgbouncer", bouncer_params, NULL},
+{"databases", NULL, parse_database},
+{NULL}
+};
+
+static const char *get_mode(ConfElem *elem)
+{
+       switch (cf_pool_mode) {
+       case POOL_STMT: return "statement";
+       case POOL_TX: return "transaction";
+       case POOL_SESSION: return "session";
+       default:
+               fatal("borken mode? should not happen");
+               return NULL;
+       }
+}
+
+static bool set_mode(ConfElem *elem, const char *val, PgSocket *console)
+{
+       if (strcasecmp(val, "session") == 0)
+               cf_pool_mode = POOL_SESSION;
+       else if (strcasecmp(val, "transaction") == 0)
+               cf_pool_mode = POOL_TX;
+       else if (strcasecmp(val, "statement") == 0)
+               cf_pool_mode = POOL_STMT;
+       else {
+               admin_error(console, "bad mode: %s", val);
+               return false;
+       }
+       return true;
+}
+
+static const char *get_auth(ConfElem *elem)
+{
+       switch (cf_auth_type) {
+       case AUTH_ANY: return "any";
+       case AUTH_TRUST: return "trust";
+       case AUTH_PLAIN: return "plain";
+       case AUTH_CRYPT: return "crypt";
+       case AUTH_MD5: return "md5";
+       default:
+               fatal("borken auth? should not happen");
+               return NULL;
+       }
+}
+
+static bool set_auth(ConfElem *elem, const char *val, PgSocket *console)
+{
+       if (strcasecmp(val, "any") == 0)
+               cf_auth_type = AUTH_ANY;
+       else if (strcasecmp(val, "trust") == 0)
+               cf_auth_type = AUTH_TRUST;
+       else if (strcasecmp(val, "plain") == 0)
+               cf_auth_type = AUTH_PLAIN;
+       else if (strcasecmp(val, "crypt") == 0)
+               cf_auth_type = AUTH_CRYPT;
+       else if (strcasecmp(val, "md5") == 0)
+               cf_auth_type = AUTH_MD5;
+       else {
+               admin_error(console, "bad auth type: %s", val);
+               return false;
+       }
+       return true;
+}
+
+/* config loading, tries to be tolerant to errors */
+void load_config(bool reload)
+{
+       /* actual loading */
+       iniparser(cf_config_file, bouncer_config, reload);
+
+       /* load users if needed */
+       if (cf_auth_type >= AUTH_TRUST)
+               load_auth_file(cf_auth_file);
+
+       /* reset pool_size */
+       config_postprocess();
+}
+
+/*
+ * signal handling.
+ *
+ * handle_* functions are not actual signal handlers but called from
+ * event_loop() so they have no restrictions what they can do.
+ */
+static struct event ev_sigterm;
+static struct event ev_sigint;
+static struct event ev_sigusr1;
+static struct event ev_sigusr2;
+static struct event ev_sighup;
+
+static void handle_sigterm(int sock, short flags, void *arg)
+{
+       log_info("Got SIGTERM, fast exit");
+       /* pidfile cleanup happens via atexit() */
+       exit(1);
+}
+
+static void handle_sigint(int sock, short flags, void *arg)
+{
+       log_info("Got SIGINT, shutting down");
+       cf_pause_mode = 1;
+       cf_shutdown = 1;
+}
+
+static void handle_sigusr1(int sock, short flags, void *arg)
+{
+       if (cf_pause_mode == 0) {
+               log_info("Got SIGUSR1, pausing all activity");
+               cf_pause_mode = 1;
+       } else {
+               log_info("Got SIGUSR1, but already paused/suspended");
+       }
+}
+
+static void handle_sigusr2(int sock, short flags, void *arg)
+{
+       switch (cf_pause_mode) {
+       case 2:
+               log_info("Got SIGUSR2, continuing from SUSPEND");
+               resume_all();
+               cf_pause_mode = 0;
+               break;
+       case 1:
+               log_info("Got SIGUSR2, continuing from PAUSE");
+               cf_pause_mode = 0;
+               break;
+       case 0:
+               log_info("Got SIGUSR1, but not paused/suspended");
+       }
+}
+
+static void handle_sighup(int sock, short flags, void *arg)
+{
+       log_info("Got SIGHUP re-reading config");
+       load_config(true);
+}
+
+static void signal_setup(void)
+{
+       int err;
+       sigset_t set;
+
+       /* block SIGPIPE */
+       sigemptyset(&set);
+       sigaddset(&set, SIGPIPE);
+       err = sigprocmask(SIG_BLOCK, &set, NULL);
+       if (err < 0)
+               fatal_perror("sigprocmask");
+
+       /* install handlers */
+       signal_set(&ev_sigterm, SIGTERM, handle_sigterm, NULL);
+       signal_add(&ev_sigterm, NULL);
+       signal_set(&ev_sigint, SIGINT, handle_sigint, NULL);
+       signal_add(&ev_sigint, NULL);
+       signal_set(&ev_sigusr1, SIGUSR1, handle_sigusr1, NULL);
+       signal_add(&ev_sigusr1, NULL);
+       signal_set(&ev_sigusr2, SIGUSR2, handle_sigusr2, NULL);
+       signal_add(&ev_sigusr2, NULL);
+       signal_set(&ev_sighup, SIGHUP, handle_sighup, NULL);
+       signal_add(&ev_sighup, NULL);
+}
+
+/*
+ * daemon mode
+ */
+static void go_daemon(void)
+{
+       int pid, fd;
+
+       if (!cf_pidfile)
+               fatal("daemon needs pidfile configured");
+
+       /* just in case close all files */
+       for (fd = 3; fd < OPEN_MAX; fd++)
+               close(fd);
+
+       /* send stdin, stdout, stderr to /dev/null */
+       fd = open("/dev/null", O_RDWR);
+       if (fd < 0)
+               fatal_perror("/dev/null");
+       dup2(fd, 0);
+       dup2(fd, 1);
+       dup2(fd, 2);
+       if (fd > 2)
+               close(fd);
+
+       /* fork new process */
+       pid = fork();
+       if (pid < 0)
+               fatal_perror("fork");
+       if (pid > 0)
+               _exit(0);
+
+       /* create new session */
+       pid = setsid();
+       if (pid < 0)
+               fatal_perror("setsid");
+
+       /* fork again to avoid being session leader */
+       pid = fork();
+       if (pid < 0)
+               fatal_perror("fork");
+       if (pid > 0)
+               _exit(0);
+
+}
+
+/*
+ * write pidfile.  if exists, quit with error.
+ */
+static void check_pidfile(void)
+{
+       struct stat st;
+       if (!cf_pidfile)
+               return;
+       if (stat(cf_pidfile, &st) >= 0)
+               fatal("pidfile exists, another instance running?");
+}
+
+static void remove_pidfile(void)
+{
+       if (!cf_pidfile)
+               return;
+       unlink(cf_pidfile);
+}
+
+static void write_pidfile(void)
+{
+       char buf[64];
+       pid_t pid;
+       int res, fd;
+
+       if (!cf_pidfile)
+               return;
+
+       pid = getpid();
+       sprintf(buf, "%u", (unsigned)pid);
+
+       fd = open(cf_pidfile, O_WRONLY | O_CREAT | O_EXCL, 0644);
+       if (fd < 0)
+               fatal_perror(cf_pidfile);
+       res = safe_write(fd, buf, strlen(buf));
+       if (res < 0)
+               fatal_perror(cf_pidfile);
+       safe_close(fd);
+
+       /* only remove when we have it actually written */
+       atexit(remove_pidfile);
+}
+
+static void daemon_setup(void)
+{
+       if (!cf_reboot)
+               check_pidfile();
+       if (cf_daemon)
+               go_daemon();
+       if (!cf_reboot)
+               write_pidfile();
+}
+
+static void main_loop_once(void)
+{
+       reset_time_cache();
+       event_loop(EVLOOP_ONCE);
+       per_loop_object_maint();
+}
+
+/* boot everything */
+int main(int argc, char *argv[])
+{
+       int c;
+
+       /* parse cmdline */
+       while ((c = getopt(argc, argv, "vhdVR")) != EOF) {
+               switch (c) {
+               case 'R':
+                       cf_reboot = 1;
+                       break;
+               case 'v':
+                       cf_verbose++;
+                       break;
+               case 'V':
+                       printf("%s version %s\n", PACKAGE_NAME, PACKAGE_VERSION);
+                       return 0;
+               case 'd':
+                       cf_daemon = 1;
+                       break;
+               case 'h':
+               default:
+                       usage(1);
+               }
+       }
+       if (optind + 1 != argc)
+               usage(1);
+       cf_config_file = argv[optind];
+       load_config(false);
+
+       /* init random */
+       srandom(time(NULL) ^ getpid());
+
+       /* initialize subsystems, order important */
+       daemon_setup();
+       event_init();
+       signal_setup();
+       janitor_setup();
+       stats_setup();
+       admin_setup();
+
+       if (cf_reboot) {
+               takeover_init();
+               while (cf_reboot)
+                       main_loop_once();
+               write_pidfile();
+       } else
+               pooler_setup();
+
+       /* main loop */
+       while (1)
+               main_loop_once();
+}
+
diff --git a/src/mbuf.h b/src/mbuf.h
new file mode 100644 (file)
index 0000000..9bb38bd
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Safe and easy access to fixed memory buffer
+ */
+
+typedef struct MBuf MBuf;
+struct MBuf {
+       const uint8 *data;
+       const uint8 *end;
+       const uint8 *pos;
+};
+
+static inline void mbuf_init(MBuf *buf, const uint8 *ptr, int len)
+{
+       if (len < 0)
+               fatal("fuckup");
+       buf->data = buf->pos = ptr;
+       buf->end = ptr + len;
+}
+
+static inline uint8 mbuf_get_char(MBuf *buf)
+{
+       if (buf->pos + 1 > buf->end)
+               fatal("buffer overflow");
+       return *buf->pos++;
+}
+
+static inline unsigned mbuf_get_uint16(MBuf *buf)
+{
+       unsigned val;
+       if (buf->pos + 2 > buf->end)
+               fatal("buffer overflow");
+       val = *buf->pos++;
+       val = (val << 8) | *buf->pos++;
+       return val;
+}
+
+static inline unsigned mbuf_get_uint32(MBuf *buf)
+{
+       unsigned val;
+       if (buf->pos + 4 > buf->end)
+               fatal("buffer overflow");
+       val = *buf->pos++;
+       val = (val << 8) | *buf->pos++;
+       val = (val << 8) | *buf->pos++;
+       val = (val << 8) | *buf->pos++;
+       return val;
+}
+
+static inline unsigned mbuf_get_uint64(MBuf *buf)
+{
+       uint64 i1, i2;
+       i1 = mbuf_get_uint32(buf);
+       i2 = mbuf_get_uint32(buf);
+       return (i1 << 32) | i2;
+}
+
+static inline const uint8 * mbuf_get_bytes(MBuf *buf, unsigned len)
+{
+       const uint8 *res = buf->pos;
+       if (len > buf->end - buf->pos)
+               fatal("buffer overflow");
+       buf->pos += len;
+       return res;
+}
+
+static inline const char * mbuf_get_string(MBuf *buf)
+{
+       const char *res = (const char *)buf->pos;
+       while (buf->pos < buf->end && *buf->pos)
+               buf->pos++;
+       if (buf->pos == buf->end)
+               return NULL;
+       buf->pos++;
+       return res;
+}
+
+static inline unsigned mbuf_avail(MBuf *buf)
+{
+       return buf->end - buf->pos;
+}
+
+static inline unsigned mbuf_size(MBuf *buf)
+{
+       return buf->end - buf->data;
+}
+
diff --git a/src/md5.c b/src/md5.c
new file mode 100644 (file)
index 0000000..c52e340
--- /dev/null
+++ b/src/md5.c
@@ -0,0 +1,407 @@
+/*        $KAME: md5.c,v 1.3 2000/02/22 14:01:17 itojun Exp $     */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *       may be used to endorse or promote products derived from this software
+ *       without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.     IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $PostgreSQL: pgsql/contrib/pgcrypto/md5.c,v 1.13 2005/07/11 15:07:59 tgl Exp $
+ */
+
+#include "bouncer.h"
+
+#include <sys/param.h>
+
+#include "md5.h"
+
+/* sanity check */
+#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN)
+#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN
+#endif
+
+#define SHIFT(X, s) (((X) << (s)) | ((X) >> (32 - (s))))
+
+#define F(X, Y, Z) (((X) & (Y)) | ((~X) & (Z)))
+#define G(X, Y, Z) (((X) & (Z)) | ((Y) & (~Z)))
+#define H(X, Y, Z) ((X) ^ (Y) ^ (Z))
+#define I(X, Y, Z) ((Y) ^ ((X) | (~Z)))
+
+#define ROUND1(a, b, c, d, k, s, i) \
+do { \
+       (a) = (a) + F((b), (c), (d)) + X[(k)] + T[(i)]; \
+       (a) = SHIFT((a), (s)); \
+       (a) = (b) + (a); \
+} while (0)
+
+#define ROUND2(a, b, c, d, k, s, i) \
+do { \
+       (a) = (a) + G((b), (c), (d)) + X[(k)] + T[(i)]; \
+       (a) = SHIFT((a), (s)); \
+       (a) = (b) + (a); \
+} while (0)
+
+#define ROUND3(a, b, c, d, k, s, i) \
+do { \
+       (a) = (a) + H((b), (c), (d)) + X[(k)] + T[(i)]; \
+       (a) = SHIFT((a), (s)); \
+       (a) = (b) + (a); \
+} while (0)
+
+#define ROUND4(a, b, c, d, k, s, i) \
+do { \
+       (a) = (a) + I((b), (c), (d)) + X[(k)] + T[(i)]; \
+       (a) = SHIFT((a), (s)); \
+       (a) = (b) + (a); \
+} while (0)
+
+#define Sa      7
+#define Sb     12
+#define Sc     17
+#define Sd     22
+
+#define Se      5
+#define Sf      9
+#define Sg     14
+#define Sh     20
+
+#define Si      4
+#define Sj     11
+#define Sk     16
+#define Sl     23
+
+#define Sm      6
+#define Sn     10
+#define So     15
+#define Sp     21
+
+#define MD5_A0 0x67452301
+#define MD5_B0 0xefcdab89
+#define MD5_C0 0x98badcfe
+#define MD5_D0 0x10325476
+
+/* Integer part of 4294967296 times abs(sin(i)), where i is in radians. */
+static const uint32 T[65] = {
+       0,
+       0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+       0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+       0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+       0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+
+       0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+       0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8,
+       0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+       0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+
+       0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+       0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+       0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
+       0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+
+       0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+       0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+       0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+       0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+static const uint8 md5_paddat[MD5_BUFLEN] = {
+       0x80, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static void md5_calc(uint8 *, md5_ctxt *);
+
+void
+md5_init(md5_ctxt * ctxt)
+{
+       ctxt->md5_n = 0;
+       ctxt->md5_i = 0;
+       ctxt->md5_sta = MD5_A0;
+       ctxt->md5_stb = MD5_B0;
+       ctxt->md5_stc = MD5_C0;
+       ctxt->md5_std = MD5_D0;
+       memset(ctxt->md5_buf, 0, sizeof(ctxt->md5_buf));
+}
+
+void
+md5_loop(md5_ctxt * ctxt, const uint8 *input, unsigned len)
+{
+       unsigned int gap,
+                               i;
+
+       ctxt->md5_n += len * 8;         /* byte to bit */
+       gap = MD5_BUFLEN - ctxt->md5_i;
+
+       if (len >= gap)
+       {
+               memmove(ctxt->md5_buf + ctxt->md5_i, input, gap);
+               md5_calc(ctxt->md5_buf, ctxt);
+
+               for (i = gap; i + MD5_BUFLEN <= len; i += MD5_BUFLEN)
+                       md5_calc((uint8 *) (input + i), ctxt);
+
+               ctxt->md5_i = len - i;
+               memmove(ctxt->md5_buf, input + i, ctxt->md5_i);
+       }
+       else
+       {
+               memmove(ctxt->md5_buf + ctxt->md5_i, input, len);
+               ctxt->md5_i += len;
+       }
+}
+
+void
+md5_pad(md5_ctxt * ctxt)
+{
+       unsigned int gap;
+
+       /* Don't count up padding. Keep md5_n. */
+       gap = MD5_BUFLEN - ctxt->md5_i;
+       if (gap > 8)
+       {
+               memmove(ctxt->md5_buf + ctxt->md5_i, md5_paddat,
+                               gap - sizeof(ctxt->md5_n));
+       }
+       else
+       {
+               /* including gap == 8 */
+               memmove(ctxt->md5_buf + ctxt->md5_i, md5_paddat, gap);
+               md5_calc(ctxt->md5_buf, ctxt);
+               memmove(ctxt->md5_buf, md5_paddat + gap,
+                               MD5_BUFLEN - sizeof(ctxt->md5_n));
+       }
+
+       /* 8 byte word */
+#if BYTE_ORDER == LITTLE_ENDIAN
+       memmove(&ctxt->md5_buf[56], &ctxt->md5_n8[0], 8);
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+       ctxt->md5_buf[56] = ctxt->md5_n8[7];
+       ctxt->md5_buf[57] = ctxt->md5_n8[6];
+       ctxt->md5_buf[58] = ctxt->md5_n8[5];
+       ctxt->md5_buf[59] = ctxt->md5_n8[4];
+       ctxt->md5_buf[60] = ctxt->md5_n8[3];
+       ctxt->md5_buf[61] = ctxt->md5_n8[2];
+       ctxt->md5_buf[62] = ctxt->md5_n8[1];
+       ctxt->md5_buf[63] = ctxt->md5_n8[0];
+#endif
+
+       md5_calc(ctxt->md5_buf, ctxt);
+}
+
+void
+md5_result(uint8 *digest, md5_ctxt * ctxt)
+{
+       /* 4 byte words */
+#if BYTE_ORDER == LITTLE_ENDIAN
+       memmove(digest, &ctxt->md5_st8[0], 16);
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+       digest[0] = ctxt->md5_st8[3];
+       digest[1] = ctxt->md5_st8[2];
+       digest[2] = ctxt->md5_st8[1];
+       digest[3] = ctxt->md5_st8[0];
+       digest[4] = ctxt->md5_st8[7];
+       digest[5] = ctxt->md5_st8[6];
+       digest[6] = ctxt->md5_st8[5];
+       digest[7] = ctxt->md5_st8[4];
+       digest[8] = ctxt->md5_st8[11];
+       digest[9] = ctxt->md5_st8[10];
+       digest[10] = ctxt->md5_st8[9];
+       digest[11] = ctxt->md5_st8[8];
+       digest[12] = ctxt->md5_st8[15];
+       digest[13] = ctxt->md5_st8[14];
+       digest[14] = ctxt->md5_st8[13];
+       digest[15] = ctxt->md5_st8[12];
+#endif
+}
+
+#if BYTE_ORDER == BIG_ENDIAN
+static uint32 X[16];
+#endif
+
+static void
+md5_calc(uint8 *b64, md5_ctxt * ctxt)
+{
+       uint32          A = ctxt->md5_sta;
+       uint32          B = ctxt->md5_stb;
+       uint32          C = ctxt->md5_stc;
+       uint32          D = ctxt->md5_std;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+       uint32     *X = (uint32 *) b64;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+       /* 4 byte words */
+       /* what a brute force but fast! */
+       uint8      *y = (uint8 *) X;
+
+       y[0] = b64[3];
+       y[1] = b64[2];
+       y[2] = b64[1];
+       y[3] = b64[0];
+       y[4] = b64[7];
+       y[5] = b64[6];
+       y[6] = b64[5];
+       y[7] = b64[4];
+       y[8] = b64[11];
+       y[9] = b64[10];
+       y[10] = b64[9];
+       y[11] = b64[8];
+       y[12] = b64[15];
+       y[13] = b64[14];
+       y[14] = b64[13];
+       y[15] = b64[12];
+       y[16] = b64[19];
+       y[17] = b64[18];
+       y[18] = b64[17];
+       y[19] = b64[16];
+       y[20] = b64[23];
+       y[21] = b64[22];
+       y[22] = b64[21];
+       y[23] = b64[20];
+       y[24] = b64[27];
+       y[25] = b64[26];
+       y[26] = b64[25];
+       y[27] = b64[24];
+       y[28] = b64[31];
+       y[29] = b64[30];
+       y[30] = b64[29];
+       y[31] = b64[28];
+       y[32] = b64[35];
+       y[33] = b64[34];
+       y[34] = b64[33];
+       y[35] = b64[32];
+       y[36] = b64[39];
+       y[37] = b64[38];
+       y[38] = b64[37];
+       y[39] = b64[36];
+       y[40] = b64[43];
+       y[41] = b64[42];
+       y[42] = b64[41];
+       y[43] = b64[40];
+       y[44] = b64[47];
+       y[45] = b64[46];
+       y[46] = b64[45];
+       y[47] = b64[44];
+       y[48] = b64[51];
+       y[49] = b64[50];
+       y[50] = b64[49];
+       y[51] = b64[48];
+       y[52] = b64[55];
+       y[53] = b64[54];
+       y[54] = b64[53];
+       y[55] = b64[52];
+       y[56] = b64[59];
+       y[57] = b64[58];
+       y[58] = b64[57];
+       y[59] = b64[56];
+       y[60] = b64[63];
+       y[61] = b64[62];
+       y[62] = b64[61];
+       y[63] = b64[60];
+#endif
+
+       ROUND1(A, B, C, D, 0, Sa, 1);
+       ROUND1(D, A, B, C, 1, Sb, 2);
+       ROUND1(C, D, A, B, 2, Sc, 3);
+       ROUND1(B, C, D, A, 3, Sd, 4);
+       ROUND1(A, B, C, D, 4, Sa, 5);
+       ROUND1(D, A, B, C, 5, Sb, 6);
+       ROUND1(C, D, A, B, 6, Sc, 7);
+       ROUND1(B, C, D, A, 7, Sd, 8);
+       ROUND1(A, B, C, D, 8, Sa, 9);
+       ROUND1(D, A, B, C, 9, Sb, 10);
+       ROUND1(C, D, A, B, 10, Sc, 11);
+       ROUND1(B, C, D, A, 11, Sd, 12);
+       ROUND1(A, B, C, D, 12, Sa, 13);
+       ROUND1(D, A, B, C, 13, Sb, 14);
+       ROUND1(C, D, A, B, 14, Sc, 15);
+       ROUND1(B, C, D, A, 15, Sd, 16);
+
+       ROUND2(A, B, C, D, 1, Se, 17);
+       ROUND2(D, A, B, C, 6, Sf, 18);
+       ROUND2(C, D, A, B, 11, Sg, 19);
+       ROUND2(B, C, D, A, 0, Sh, 20);
+       ROUND2(A, B, C, D, 5, Se, 21);
+       ROUND2(D, A, B, C, 10, Sf, 22);
+       ROUND2(C, D, A, B, 15, Sg, 23);
+       ROUND2(B, C, D, A, 4, Sh, 24);
+       ROUND2(A, B, C, D, 9, Se, 25);
+       ROUND2(D, A, B, C, 14, Sf, 26);
+       ROUND2(C, D, A, B, 3, Sg, 27);
+       ROUND2(B, C, D, A, 8, Sh, 28);
+       ROUND2(A, B, C, D, 13, Se, 29);
+       ROUND2(D, A, B, C, 2, Sf, 30);
+       ROUND2(C, D, A, B, 7, Sg, 31);
+       ROUND2(B, C, D, A, 12, Sh, 32);
+
+       ROUND3(A, B, C, D, 5, Si, 33);
+       ROUND3(D, A, B, C, 8, Sj, 34);
+       ROUND3(C, D, A, B, 11, Sk, 35);
+       ROUND3(B, C, D, A, 14, Sl, 36);
+       ROUND3(A, B, C, D, 1, Si, 37);
+       ROUND3(D, A, B, C, 4, Sj, 38);
+       ROUND3(C, D, A, B, 7, Sk, 39);
+       ROUND3(B, C, D, A, 10, Sl, 40);
+       ROUND3(A, B, C, D, 13, Si, 41);
+       ROUND3(D, A, B, C, 0, Sj, 42);
+       ROUND3(C, D, A, B, 3, Sk, 43);
+       ROUND3(B, C, D, A, 6, Sl, 44);
+       ROUND3(A, B, C, D, 9, Si, 45);
+       ROUND3(D, A, B, C, 12, Sj, 46);
+       ROUND3(C, D, A, B, 15, Sk, 47);
+       ROUND3(B, C, D, A, 2, Sl, 48);
+
+       ROUND4(A, B, C, D, 0, Sm, 49);
+       ROUND4(D, A, B, C, 7, Sn, 50);
+       ROUND4(C, D, A, B, 14, So, 51);
+       ROUND4(B, C, D, A, 5, Sp, 52);
+       ROUND4(A, B, C, D, 12, Sm, 53);
+       ROUND4(D, A, B, C, 3, Sn, 54);
+       ROUND4(C, D, A, B, 10, So, 55);
+       ROUND4(B, C, D, A, 1, Sp, 56);
+       ROUND4(A, B, C, D, 8, Sm, 57);
+       ROUND4(D, A, B, C, 15, Sn, 58);
+       ROUND4(C, D, A, B, 6, So, 59);
+       ROUND4(B, C, D, A, 13, Sp, 60);
+       ROUND4(A, B, C, D, 4, Sm, 61);
+       ROUND4(D, A, B, C, 11, Sn, 62);
+       ROUND4(C, D, A, B, 2, So, 63);
+       ROUND4(B, C, D, A, 9, Sp, 64);
+
+       ctxt->md5_sta += A;
+       ctxt->md5_stb += B;
+       ctxt->md5_stc += C;
+       ctxt->md5_std += D;
+}
+
+/* vi: set ts=4: */
diff --git a/src/md5.h b/src/md5.h
new file mode 100644 (file)
index 0000000..58fa491
--- /dev/null
+++ b/src/md5.h
@@ -0,0 +1,82 @@
+/*     $PostgreSQL: pgsql/contrib/pgcrypto/md5.h,v 1.9 2005/10/15 02:49:06 momjian Exp $ */
+/*        $KAME: md5.h,v 1.3 2000/02/22 14:01:18 itojun Exp $     */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *       may be used to endorse or promote products derived from this software
+ *       without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.     IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET6_MD5_H_
+#define _NETINET6_MD5_H_
+
+#define MD5_BUFLEN     64
+#define MD5_DIGEST_LENGTH 16
+
+typedef struct
+{
+       union
+       {
+               uint32          md5_state32[4];
+               uint8           md5_state8[16];
+       }                       md5_st;
+
+#define md5_sta                md5_st.md5_state32[0]
+#define md5_stb                md5_st.md5_state32[1]
+#define md5_stc                md5_st.md5_state32[2]
+#define md5_std                md5_st.md5_state32[3]
+#define md5_st8                md5_st.md5_state8
+
+       union
+       {
+               uint64          md5_count64;
+               uint8           md5_count8[8];
+       }                       md5_count;
+#define md5_n  md5_count.md5_count64
+#define md5_n8 md5_count.md5_count8
+
+       unsigned int md5_i;
+       uint8           md5_buf[MD5_BUFLEN];
+}      md5_ctxt;
+
+extern void md5_init(md5_ctxt *);
+extern void md5_loop(md5_ctxt *, const uint8 *, unsigned int);
+extern void md5_pad(md5_ctxt *);
+extern void md5_result(uint8 *, md5_ctxt *);
+
+/* compatibility with OpenSSL */
+#define MD5_CTX                md5_ctxt
+#define MD5_Init(x)    md5_init((x))
+#define MD5_Update(x, y, z)    md5_loop((x), (void*)(y), (z))
+#define MD5_Final(x, y) \
+do {                           \
+       md5_pad((y));           \
+       md5_result((x), (y));   \
+} while (0)
+
+#endif   /* ! _NETINET6_MD5_H_ */
+
+/* vi: set ts=4: */
diff --git a/src/objects.c b/src/objects.c
new file mode 100644 (file)
index 0000000..e84445e
--- /dev/null
@@ -0,0 +1,931 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Herding objects between lists happens here.
+ */
+
+#include "bouncer.h"
+
+/* those items will be allocated as needed, never freed */
+STATLIST(user_list);
+STATLIST(database_list);
+STATLIST(pool_list);
+
+/*
+ * client and server objects will be pre-allocated
+ * they are always in either active or free lists
+ * in addition to others.
+ */
+STATLIST(free_client_list);
+STATLIST(free_server_list);
+STATLIST(login_client_list);
+
+/* how many client sockets are allocated */
+static int absolute_client_count = 0;
+/* how many server sockets are allocated */
+static int absolute_server_count = 0;
+
+/* list of users ordered by name */
+static PgUser **user_lookup = NULL;
+
+/* drop lookup list because it will be out of sync */
+static void reset_auth_cache(void)
+{
+       if (user_lookup != NULL) {
+               free(user_lookup);
+               user_lookup = NULL;
+       }
+}
+
+/* fast way to get number of active clients */
+int get_active_client_count(void)
+{
+       return absolute_client_count - statlist_count(&free_client_list);
+}
+
+/* fast way to get number of active servers */
+int get_active_server_count(void)
+{
+       return absolute_server_count - statlist_count(&free_server_list);
+}
+
+/* this should be called on free socket that is put into use */
+static void clean_socket(PgSocket *sk)
+{
+       sk->link = NULL;
+       sk->pool = NULL;
+
+       sk->wait_for_welcome = 0;
+       sk->ready = 0;
+       sk->flush_req = 0;
+       sk->admin_user = 0;
+       sk->own_user = 0;
+       sk->suspended = 0;
+       sk->wait_for_response = 0;
+
+       sk->connect_time = 0;
+       sk->request_time = 0;
+       sk->query_start = 0;
+
+       sk->auth_user = NULL;
+}
+
+/* allocate & fll client socket */
+static PgSocket *new_client(void)
+{
+       PgSocket *client;
+
+       /* get free PgSocket */
+       client = first_socket(&free_client_list);
+       if (client) {
+               clean_socket(client);
+               return client;
+       }
+
+       client = zmalloc(sizeof(*client) + cf_sbuf_len);
+       if (!client)
+               return NULL;
+
+       list_init(&client->head);
+       sbuf_init(&client->sbuf, client_proto, client);
+       statlist_prepend(&client->head, &free_client_list);
+       client->state = CL_FREE;
+
+       absolute_client_count++;
+
+       return client;
+}
+
+/* allocate & fill server socket */
+static PgSocket *new_server(void)
+{
+       PgSocket *server;
+
+       /* get free PgSocket */
+       server = first_socket(&free_server_list);
+       if (server) {
+               clean_socket(server);
+               return server;
+       }
+
+       server = zmalloc(sizeof(*server) + cf_sbuf_len);
+       if (!server)
+               return NULL;
+
+       list_init(&server->head);
+       sbuf_init(&server->sbuf, server_proto, server);
+       statlist_prepend(&server->head, &free_server_list);
+       server->state = SV_FREE;
+
+       absolute_server_count++;
+
+       return server;
+}
+
+/* state change means moving between lists */
+void change_client_state(PgSocket *client, SocketState newstate)
+{
+       PgPool *pool = client->pool;
+
+       /* remove from old location */
+       switch (client->state) {
+       case CL_FREE:
+               statlist_remove(&client->head, &free_client_list);
+               break;
+       case CL_LOGIN:
+               statlist_remove(&client->head, &login_client_list);
+               break;
+       case CL_WAITING:
+               statlist_remove(&client->head, &pool->waiting_client_list);
+               break;
+       case CL_ACTIVE:
+               statlist_remove(&client->head, &pool->active_client_list);
+               break;
+       case CL_CANCEL:
+               statlist_remove(&client->head, &pool->cancel_req_list);
+               break;
+       default:
+               fatal("bad cur client state: %d", client->state);
+       }
+
+       client->state = newstate;
+
+       /* put to new location */
+       switch (client->state) {
+       case CL_FREE:
+               /* use LIFO the keep cache warm */
+               statlist_prepend(&client->head, &free_client_list);
+               break;
+       case CL_LOGIN:
+               statlist_append(&client->head, &login_client_list);
+               break;
+       case CL_WAITING:
+               statlist_append(&client->head, &pool->waiting_client_list);
+               break;
+       case CL_ACTIVE:
+               statlist_append(&client->head, &pool->active_client_list);
+               break;
+       case CL_CANCEL:
+               statlist_append(&client->head, &pool->cancel_req_list);
+               break;
+       default:
+               fatal("bad new client state: %d", client->state);
+       }
+}
+
+/* state change means moving between lists */
+void change_server_state(PgSocket *server, SocketState newstate)
+{
+       PgPool *pool = server->pool;
+
+       /* remove from old location */
+       switch (server->state) {
+       case SV_FREE:
+               statlist_remove(&server->head, &free_server_list);
+               break;
+       case SV_LOGIN:
+               statlist_remove(&server->head, &pool->new_server_list);
+               break;
+       case SV_USED:
+               statlist_remove(&server->head, &pool->used_server_list);
+               break;
+       case SV_TESTED:
+               statlist_remove(&server->head, &pool->tested_server_list);
+               break;
+       case SV_IDLE:
+               statlist_remove(&server->head, &pool->idle_server_list);
+               break;
+       case SV_ACTIVE:
+               statlist_remove(&server->head, &pool->active_server_list);
+               break;
+       default:
+               fatal("change_server_state: bad old server state: %d", server->state);
+       }
+
+       server->state = newstate;
+
+       /* put to new location */
+       switch (server->state) {
+       case SV_FREE:
+               /* use LIFO the keep cache warm */
+               statlist_prepend(&server->head, &free_server_list);
+               break;
+       case SV_LOGIN:
+               statlist_append(&server->head, &pool->new_server_list);
+               break;
+       case SV_USED:
+               /* again, LIFO */
+               statlist_prepend(&server->head, &pool->used_server_list);
+               break;
+       case SV_TESTED:
+               statlist_append(&server->head, &pool->tested_server_list);
+               break;
+       case SV_IDLE:
+               if (server->close_needed)
+                       /* try to avoid immidiate usage then */
+                       statlist_append(&server->head, &pool->idle_server_list);
+               else
+                       /* otherwise use LIFO */
+                       statlist_prepend(&server->head, &pool->idle_server_list);
+               break;
+       case SV_ACTIVE:
+               statlist_append(&server->head, &pool->active_server_list);
+               break;
+       default:
+               fatal("bad server state");
+       }
+}
+
+/* compare pool names, for use with put_in_order */
+static int cmp_pool(List *i1, List *i2)
+{
+       PgPool *p1 = container_of(i1, PgPool, head);
+       PgPool *p2 = container_of(i2, PgPool, head);
+       if (p1->db != p2->db)
+               return strcmp(p1->db->name, p2->db->name);
+       if (p1->user != p2->user)
+               return strcmp(p1->user->name, p2->user->name);
+       return 0;
+}
+
+/* compare user names, for use with put_in_order */
+static int cmp_user(List *i1, List *i2)
+{
+       PgUser *u1 = container_of(i1, PgUser, head);
+       PgUser *u2 = container_of(i2, PgUser, head);
+       return strcmp(u1->name, u2->name);
+}
+
+/* compare db names, for use with put_in_order */
+static int cmp_database(List *i1, List *i2)
+{
+       PgDatabase *db1 = container_of(i1, PgDatabase, head);
+       PgDatabase *db2 = container_of(i2, PgDatabase, head);
+       return strcmp(db1->name, db2->name);
+}
+
+/* put elem into list in correct pos */
+static void put_in_order(List *newitem, StatList *list, int (*cmpfn)(List *, List *))
+{
+       int res;
+       List *item;
+
+       statlist_for_each(item, list) {
+               res = cmpfn(item, newitem);
+               if (res == 0)
+                       fatal("put_in_order: found existing elem");
+               else if (res > 0) {
+                       statlist_put_before(newitem, list, item);
+                       return;
+               }
+       }
+       statlist_append(newitem, list);
+}
+
+/* create new object if new, then return it */
+PgDatabase *add_database(const char *name)
+{
+       PgDatabase *db = find_database(name);
+
+       /* create new object if needed */
+       if (db == NULL) {
+               db = zmalloc(sizeof(*db));
+               if (!db)
+                       return NULL;
+
+               list_init(&db->head);
+               strlcpy(db->name, name, sizeof(db->name));
+               put_in_order(&db->head, &database_list, cmp_database);
+       }
+
+       return db;
+}
+
+/* add or update client users */
+PgUser *add_user(const char *name, const char *passwd)
+{
+       PgUser *user = find_user(name);
+
+       reset_auth_cache();
+
+       if (user == NULL) {
+               user = zmalloc(sizeof(*user));
+               if (!user)
+                       return NULL;
+
+               list_init(&user->head);
+               list_init(&user->pool_list);
+               strlcpy(user->name, name, sizeof(user->name));
+               put_in_order(&user->head, &user_list, cmp_user);
+       }
+       strlcpy(user->passwd, passwd, sizeof(user->passwd));
+       return user;
+}
+
+/* create separate user object for storing server user info */
+PgUser *force_user(PgDatabase *db, const char *name, const char *passwd)
+{
+       PgUser *user = db->forced_user;
+       if (!user) {
+               user = zmalloc(sizeof(*user));
+               if (!user)
+                       return NULL;
+               list_init(&user->head);
+               list_init(&user->pool_list);
+       }
+       strlcpy(user->name, name, sizeof(user->name));
+       strlcpy(user->passwd, passwd, sizeof(user->passwd));
+       db->forced_user = user;
+       return user;
+}
+
+/* find a existing database */
+PgDatabase *find_database(const char *name)
+{
+       List *item;
+       PgDatabase *db;
+       statlist_for_each(item, &database_list) {
+               db = container_of(item, PgDatabase, head);
+               if (strcmp(db->name, name) == 0)
+                       return db;
+       }
+       return NULL;
+}
+
+/* compare string with PgUser->name, for usage with bsearch() */
+static int user_name_cmp(const void *namestr, const void *userptr)
+{
+       const PgUser * const *user_p = userptr;
+       const PgUser *user = *user_p;
+       return strcmp(namestr, user->name);
+}
+
+/* find existing user */
+PgUser *find_user(const char *name)
+{
+       List *item;
+       PgUser *user;
+
+       /* if lookup table is available, use faster method */
+       if (user_lookup) {
+               PgUser **res;
+               res = bsearch(name, user_lookup,
+                             statlist_count(&user_list),
+                             sizeof(PgUser *),
+                             user_name_cmp);
+               return res ? *res : NULL;
+       }
+
+       /* slow lookup */
+       statlist_for_each(item, &user_list) {
+               user = container_of(item, PgUser, head);
+               if (strcmp(user->name, name) == 0)
+                       return user;
+       }
+       return NULL;
+}
+
+/* create lookup list */
+void create_auth_cache(void)
+{
+       int i = 0;
+       List *item;
+       PgUser *user;
+
+       reset_auth_cache();
+
+       user_lookup = malloc(sizeof(PgUser *) * statlist_count(&user_list));
+       if (!user_lookup)
+               return;
+
+       statlist_for_each(item, &user_list) {
+               user = container_of(item, PgUser, head);
+               user_lookup[i++] = user;
+       }
+}
+
+/* create new pool object */
+static PgPool *new_pool(PgDatabase *db, PgUser *user)
+{
+       PgPool *pool;
+
+       pool = zmalloc(sizeof(*pool));
+       if (!pool)
+               return NULL;
+
+       list_init(&pool->head);
+       list_init(&pool->map_head);
+
+       pool->user = user;
+       pool->db = db;
+
+       statlist_init(&pool->active_client_list, "active_client_list");
+       statlist_init(&pool->waiting_client_list, "waiting_client_list");
+       statlist_init(&pool->active_server_list, "active_server_list");
+       statlist_init(&pool->idle_server_list, "idle_server_list");
+       statlist_init(&pool->tested_server_list, "tested_server_list");
+       statlist_init(&pool->used_server_list, "used_server_list");
+       statlist_init(&pool->new_server_list, "new_server_list");
+       statlist_init(&pool->cancel_req_list, "cancel_req_list");
+
+       list_append(&pool->map_head, &user->pool_list);
+
+       /* keep pools in db/user order to make stats faster */
+       put_in_order(&pool->head, &pool_list, cmp_pool);
+
+       return pool;
+}
+
+/* find pool object, create if needed */
+PgPool *get_pool(PgDatabase *db, PgUser *user)
+{
+       List *item;
+       PgPool *pool;
+
+       if (!db || !user)
+               return NULL;
+
+       list_for_each(item, &user->pool_list) {
+               pool = container_of(item, PgPool, map_head);
+               if (pool->db == db)
+                       return pool;
+       }
+
+       return new_pool(db, user);
+}
+
+/* deactivate socket and put into wait queue */
+void pause_client(PgSocket *client)
+{
+       Assert(client->state == CL_ACTIVE);
+
+       slog_debug(client, "pause_client");
+       change_client_state(client, CL_WAITING);
+       sbuf_pause(&client->sbuf);
+}
+
+/* wake client from wait */
+void activate_client(PgSocket *client)
+{
+       Assert(client->state == CL_WAITING);
+
+       slog_debug(client, "activate_client");
+       change_client_state(client, CL_ACTIVE);
+       sbuf_continue(&client->sbuf);
+}
+
+/* link if found, otherwise put into wait queue */
+bool find_server(PgSocket *client)
+{
+       PgPool *pool = client->pool;
+       PgSocket *server;
+       bool res;
+
+       Assert(client->state == CL_ACTIVE);
+
+       if (client->link)
+               return true;
+
+       /* try to get idle server, if allowed */
+       if (cf_pause_mode == 1)
+               server = NULL;
+       else
+               server = first_socket(&pool->idle_server_list);
+
+       /* link or send to waiters list */
+       if (server) {
+               Assert(server->state == SV_IDLE);
+               client->link = server;
+               server->link = client;
+               change_server_state(server, SV_ACTIVE);
+               res = true;
+       } else {
+               pause_client(client);
+               Assert(client->state == CL_WAITING);
+               res = false;
+       }
+       return res;
+}
+
+/* connecting/active -> idle, unlink if needed */
+void release_server(PgSocket *server)
+{
+       PgPool *pool = server->pool;
+       SocketState newstate = SV_IDLE;
+
+       /* btw, this function is not allowed to disconnect,
+          as there may be packet pending */
+       Assert(server->ready);
+
+       /* remove from old list */
+       switch (server->state) {
+       case SV_ACTIVE:
+               server->link->link = NULL;
+               server->link = NULL;
+
+               if (cf_server_check_delay == 0 && *cf_server_check_query)
+                       newstate = SV_USED;
+       case SV_USED:
+       case SV_TESTED:
+               break;
+       case SV_LOGIN:
+               pool->last_connect_failed = 0;
+               break;
+       default:
+               fatal("bad server state in release_server");
+       }
+
+       Assert(server->link == NULL);
+
+       log_debug("release_server: new state=%d", newstate);
+
+       change_server_state(server, newstate);
+}
+
+/* drop server connection */
+void disconnect_server(PgSocket *server, bool notify, const char *reason)
+{
+       PgPool *pool = server->pool;
+       PgSocket *client = server->link;
+       static const uint8 pkt_term[] = {'X', 0,0,0,4};
+       int send_term = 1;
+
+       log_debug("disconnect_server");
+       slog_info(server, "closing because: %s", reason);
+
+       switch (server->state) {
+       case SV_ACTIVE:
+               client = server->link;
+               if (client) {
+                       client->link = NULL;
+                       server->link = NULL;
+                       disconnect_client(client, true, reason);
+               }
+               break;
+       case SV_TESTED:
+       case SV_USED:
+       case SV_IDLE:
+               break;
+       case SV_LOGIN:
+               /*
+                * usually disconnect means problems in startup phase,
+                * except when sending cancel packet
+                */
+               if (!server->ready)
+                       pool->last_connect_failed = 1;
+               else
+                       send_term = 0;
+               break;
+       default:
+               fatal("disconnect_server: bad server state");
+       }
+
+       Assert(server->link == NULL);
+
+       /* notify server and close connection */
+       if (send_term && notify)
+               sbuf_answer(&server->sbuf, pkt_term, sizeof(pkt_term));
+       sbuf_close(&server->sbuf);
+
+       change_server_state(server, SV_FREE);
+}
+
+/* drop client connection */
+void disconnect_client(PgSocket *client, bool notify, const char *reason)
+{
+       slog_debug(client, "closing because: %s", reason);
+
+       switch (client->state) {
+       case CL_ACTIVE:
+               if (client->link) {
+                       PgSocket *server = client->link;
+                       if (server->ready) {
+                               release_server(server);
+                       } else {
+                               server->link = NULL;
+                               client->link = NULL;
+                               disconnect_server(server, true, "unclean server");
+                       }
+               }
+       case CL_LOGIN:
+       case CL_WAITING:
+       case CL_CANCEL:
+               break;
+       default:
+               fatal("bad client state in disconnect_client: %d", client->state);
+       }
+
+       /* send reason to client */
+       if (notify && reason) {
+               /*
+                * dont send Ready pkt here, or client wont notice
+                * closed connection
+                */
+               send_pooler_error(client, false, reason);
+       }
+
+       sbuf_close(&client->sbuf);
+
+       change_client_state(client, CL_FREE);
+}
+
+/* the pool needs new connection, if possible */
+void launch_new_connection(PgPool *pool)
+{
+       PgSocket *server;
+       int total;
+
+       /* allow only small number of connection attempts at a time */
+       if (!statlist_empty(&pool->new_server_list)) {
+               log_debug("launch_new_connection: already progress");
+               return;
+       }
+
+       /* if server bounces, dont retry too fast */
+       if (pool->last_connect_failed) {
+               usec_t now = get_cached_time();
+               if (now - pool->last_connect_time < cf_server_login_retry) {
+                       log_debug("launch_new_connection: last failed, wait");
+                       return;
+               }
+       }
+
+       /* is it allowed to add servers? */
+       total = pool_server_count(pool);
+       if (total >= pool->db->pool_size && pool->db->welcome_msg_ready) {
+               log_debug("launch_new_connection: pool full (%d >= %d)",
+                               total, pool->db->pool_size);
+               return;
+       }
+
+       /* get free conn object */
+       server = new_server();
+       if (!server) {
+               log_debug("launch_new_connection: no mem");
+               return;
+       }
+
+       /* initialize it */
+       server->pool = pool;
+       server->auth_user = server->pool->user;
+       server->addr = server->pool->db->addr;
+       server->connect_time = get_cached_time();
+       pool->last_connect_time = get_cached_time();
+       change_server_state(server, SV_LOGIN);
+
+       /* start connecting */
+       slog_info(server, "new connection to server");
+       sbuf_connect(&server->sbuf, &server->addr, cf_server_connect_timeout / USEC);
+}
+
+/* new client connection attempt */
+PgSocket * accept_client(int sock,
+                        const struct sockaddr_in *addr,
+                        bool is_unix)
+{
+       PgSocket *client;
+
+       /* get free PgSocket */
+       client = new_client();
+       if (!client)
+               return NULL;
+
+       client->connect_time = client->request_time = get_cached_time();
+       client->query_start = 0;
+
+       if (addr) {
+               client->addr.ip_addr = addr->sin_addr;
+               client->addr.port = ntohs(addr->sin_port);
+       } else {
+               memset(&client->addr, 0, sizeof(client->addr));
+       }
+       client->addr.is_unix = is_unix;
+       change_client_state(client, CL_LOGIN);
+
+       slog_debug(client, "got connection attempt");
+       sbuf_accept(&client->sbuf, sock, is_unix);
+
+       return client;
+}
+
+/* send cached parameters to client to pretend being server */
+/* client managed to authenticate, send welcome msg and accept queries */
+bool finish_client_login(PgSocket *client)
+{
+       switch (client->state) {
+       case CL_LOGIN:
+               change_client_state(client, CL_ACTIVE);
+       case CL_ACTIVE:
+               break;
+       default:
+               fatal("bad client state");
+       }
+
+       if (!welcome_client(client)) {
+               log_debug("finish_client_login: no welcome msg, pause");
+               client->wait_for_welcome = 1;
+               pause_client(client);
+               if (!cf_pause_mode)
+                       launch_new_connection(client->pool);
+               return false;
+       }
+       client->wait_for_welcome = 0;
+
+       slog_debug(client, "logged in");
+       return true;
+}
+
+/* client->cancel_key has requested client key */
+void accept_cancel_request(PgSocket *req)
+{
+       List *pitem, *citem;
+       PgPool *pool;
+       PgSocket *server = NULL, *client, *main_client = NULL;
+
+       Assert(req->state == CL_LOGIN);
+
+       /* find real client this is for */
+       statlist_for_each(pitem, &pool_list) {
+               pool = container_of(pitem, PgPool, head);
+               statlist_for_each(citem, &pool->active_client_list) {
+                       client = container_of(citem, PgSocket, head);
+                       if (memcmp(client->cancel_key, req->cancel_key, 8) == 0) {
+                               main_client = client;
+                               break;
+                       }
+               }
+       }
+
+       /* wrong key */
+       if (!main_client) {
+               disconnect_client(req, false, "failed cancel req");
+               return;
+       }
+
+       /* not linked client, just drop it then */
+       if (!main_client->link) {
+               disconnect_client(main_client, true, "canceling idle client");
+               disconnect_client(req, false, "cancel req for idle client");
+               return;
+       }
+
+       /* drop the connection silently */
+       sbuf_close(&req->sbuf);
+
+       /* remember server key */
+       server = main_client->link;
+       memcpy(req->cancel_key, server->cancel_key, 8);
+       statlist_remove(&req->head, &login_client_list);
+       statlist_append(&req->head, &pool->cancel_req_list);
+       req->state =  CL_CANCEL;
+
+       launch_new_connection(pool);
+}
+
+void forward_cancel_request(PgSocket *server)
+{
+       bool res;
+       PgSocket *req = first_socket(&server->pool->cancel_req_list);
+
+       Assert(req != NULL && req->state == CL_CANCEL);
+       Assert(server->state == SV_LOGIN);
+
+       SEND_CancelRequest(res, server, req->cancel_key);
+
+       change_client_state(req, CL_FREE);
+}
+
+bool use_client_socket(int fd, PgAddr *addr,
+                      const char *dbname, const char *username,
+                      uint64 ckey, int oldfd, int linkfd)
+{
+       PgDatabase *db = find_database(dbname);
+       PgUser *user = find_user(username);
+       PgPool *pool = get_pool(db, user);
+       PgSocket *client;
+       PktBuf tmp;
+
+       if (!pool)
+               return false;
+
+       client = accept_client(fd, NULL, addr->is_unix);
+       client->addr = *addr;
+       client->suspended = 1;
+
+       if (!set_pool(client, dbname, username))
+               return false;
+
+       change_client_state(client, CL_ACTIVE);
+
+       /* store old cancel key */
+       pktbuf_static(&tmp, client->cancel_key, 8);
+       pktbuf_put_uint64(&tmp, ckey);
+
+       /* store old fds */
+       client->tmp_sk_oldfd = oldfd;
+       client->tmp_sk_linkfd = linkfd;
+
+       return true;
+}
+
+bool use_server_socket(int fd, PgAddr *addr,
+                      const char *dbname, const char *username,
+                      uint64 ckey, int oldfd, int linkfd)
+{
+       PgDatabase *db = find_database(dbname);
+       PgUser *user;
+       PgPool *pool;
+       PgSocket *server;
+       PktBuf tmp;
+
+       if (db->forced_user)
+               user = db->forced_user;
+       else
+               user = find_user(username);
+
+       pool = get_pool(db, user);
+       if (!pool)
+               return false;
+
+       server = new_server();
+       if (!server)
+               return false;
+
+       sbuf_accept(&server->sbuf, fd, addr->is_unix);
+       server->suspended = 1;
+       server->pool = pool;
+       server->auth_user = user;
+       server->addr = *addr;
+       server->connect_time = server->request_time = get_cached_time();
+       server->query_start = 0;
+
+       if (linkfd)
+               change_server_state(server, SV_ACTIVE);
+       else
+               change_server_state(server, SV_IDLE);
+
+       /* store old cancel key */
+       pktbuf_static(&tmp, server->cancel_key, 8);
+       pktbuf_put_uint64(&tmp, ckey);
+
+       /* store old fds */
+       server->tmp_sk_oldfd = oldfd;
+       server->tmp_sk_linkfd = linkfd;
+
+       return true;
+}
+
+void for_each_server(PgPool *pool, void (*func)(PgSocket *sk))
+{
+       List *item;
+
+       statlist_for_each(item, &pool->idle_server_list)
+               func(container_of(item, PgSocket, head));
+
+       statlist_for_each(item, &pool->used_server_list)
+               func(container_of(item, PgSocket, head));
+
+       statlist_for_each(item, &pool->tested_server_list)
+               func(container_of(item, PgSocket, head));
+
+       statlist_for_each(item, &pool->active_server_list)
+               func(container_of(item, PgSocket, head));
+
+       statlist_for_each(item, &pool->new_server_list)
+               func(container_of(item, PgSocket, head));
+}
+
+static void tag_dirty(PgSocket *sk)
+{
+       sk->close_needed = 1;
+}
+
+void tag_database_dirty(PgDatabase *db)
+{
+       List *item;
+       PgPool *pool;
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               if (pool->db == db)
+                       for_each_server(pool, tag_dirty);
+       }
+}
+
+
diff --git a/src/objects.h b/src/objects.h
new file mode 100644 (file)
index 0000000..9d95ad5
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+extern StatList user_list;
+extern StatList pool_list;
+extern StatList database_list;
+extern StatList login_client_list;
+extern StatList free_server_list;
+extern StatList free_client_list;
+extern StatList login_client_list;
+
+PgDatabase *find_database(const char *name);
+PgUser *find_user(const char *name);
+PgPool *get_pool(PgDatabase *, PgUser *);
+bool find_server(PgSocket *client);
+void release_server(PgSocket *server);
+bool finish_client_login(PgSocket *client);
+
+PgSocket * accept_client(int sock, const struct sockaddr_in *addr, bool is_unix);
+void disconnect_server(PgSocket *server, bool notify, const char *reason);
+void disconnect_client(PgSocket *client, bool notify, const char *reason);
+
+PgDatabase * add_database(const char *name);
+PgUser * add_user(const char *name, const char *passwd);
+PgUser * force_user(PgDatabase *db, const char *username, const char *passwd);
+
+void accept_cancel_request(PgSocket *req);
+void forward_cancel_request(PgSocket *server);
+
+void launch_new_connection(PgPool *pool);
+
+bool use_client_socket(int fd, PgAddr *addr, const char *dbname, const char *username, uint64 ckey, int oldfd, int linkfd);
+bool use_server_socket(int fd, PgAddr *addr, const char *dbname, const char *username, uint64 ckey, int oldfd, int linkfd);
+
+void pause_client(PgSocket *client);
+void activate_client(PgSocket *client);
+
+void change_client_state(PgSocket *client, SocketState newstate);
+void change_server_state(PgSocket *server, SocketState newstate);
+
+int get_active_client_count(void);
+int get_active_server_count(void);
+
+void tag_database_dirty(PgDatabase *db);
+void for_each_server(PgPool *pool, void (*func)(PgSocket *sk));
+
+void create_auth_cache(void);
+
diff --git a/src/pktbuf.c b/src/pktbuf.c
new file mode 100644 (file)
index 0000000..ba65042
--- /dev/null
@@ -0,0 +1,405 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Packet writing and sending.
+ */
+
+#include "bouncer.h"
+
+static void pktbuf_free(PktBuf *buf)
+{
+       if (buf->fixed_buf)
+               return;
+
+       log_debug("pktbuf_free(%p)", buf);
+       if (buf->buf)
+               free(buf->buf);
+       if (buf->ev)
+               free(buf->ev);
+       free(buf);
+}
+
+PktBuf *pktbuf_dynamic(int start_len)
+{
+       PktBuf *buf = zmalloc(sizeof(PktBuf));
+       log_debug("pktbuf_dynamic(%d): %p", start_len, buf);
+       if (!buf)
+               return NULL;
+
+       buf->ev = zmalloc(sizeof(*buf->ev));
+       if (!buf->ev) {
+               pktbuf_free(buf);
+               return NULL;
+       }
+       buf->buf = malloc(start_len);
+       if (!buf->buf) {
+               pktbuf_free(buf);
+               return NULL;
+       }
+       buf->buf_len = start_len;
+       return buf;
+}
+
+void pktbuf_static(PktBuf *buf, uint8 *data, int len)
+{
+       memset(buf, 0, sizeof(*buf));
+       buf->buf = data;
+       buf->buf_len = len;
+       buf->fixed_buf = 1;
+}
+
+bool pktbuf_send_immidiate(PktBuf *buf, PgSocket *sk)
+{
+       int fd = sbuf_socket(&sk->sbuf);
+       uint8 *pos = buf->buf + buf->send_pos;
+       int amount = buf->write_pos - buf->send_pos;
+       int res;
+
+       if (buf->failed)
+               return false;
+       res = safe_send(fd, pos, amount, 0);
+       if (res < 0) {
+               log_error("pktbuf_send_immidiate: %s", strerror(errno));
+       }
+       return res == amount;
+}
+
+static void pktbuf_send_func(int fd, short flags, void *arg)
+{
+       PktBuf *buf = arg;
+       int amount, res;
+
+       log_debug("pktbuf_send_func(%d, %d, %p)", fd, (int)flags, buf);
+
+       if (buf->failed)
+               return;
+
+       amount = buf->write_pos - buf->send_pos;
+       res = safe_send(fd, buf->buf + buf->send_pos, amount, 0);
+       if (res < 0) {
+               if (res == EAGAIN) {
+                       res = 0;
+               } else {
+                       log_error("pktbuf_send_func: %s", strerror(errno));
+                       pktbuf_free(buf);
+                       return;
+               }
+       }
+       buf->send_pos += res;
+
+       if (buf->send_pos < buf->write_pos) {
+               event_set(buf->ev, fd, EV_WRITE, pktbuf_send_func, buf);
+               event_add(buf->ev, NULL);
+       } else
+               pktbuf_free(buf);
+}
+
+void pktbuf_send_queued(PktBuf *buf, PgSocket *sk)
+{
+       int fd = sbuf_socket(&sk->sbuf);
+
+       Assert(!buf->sending);
+       Assert(!buf->fixed_buf);
+
+       if (buf->failed) {
+               send_pooler_error(sk, true, "result prepare failed");
+               pktbuf_free(buf);
+       } else {
+               buf->sending = 1;
+               pktbuf_send_func(fd, EV_WRITE, buf);
+       }
+}
+
+static void make_room(PktBuf *buf, int len)
+{
+       int newlen = buf->buf_len;
+       int need = buf->write_pos + len;
+       void *ptr;
+
+       if (newlen >= need)
+               return;
+
+       if (buf->failed)
+               return;
+
+       if (buf->fixed_buf) {
+               buf->failed = 1;
+               return;
+       }
+       
+       while (newlen < need)
+               newlen = newlen * 2;
+
+       log_debug("make_room(%p, %d): realloc newlen=%d",
+                 buf, len, newlen);
+       ptr = realloc(buf->buf, newlen);
+       if (!ptr) {
+               buf->failed = 1;
+       } else {
+               buf->buf = ptr;
+               buf->buf_len = newlen;
+       }
+}
+
+void pktbuf_put_char(PktBuf *buf, char val)
+{
+       make_room(buf, 1);
+       if (buf->failed)
+               return;
+
+       buf->buf[buf->write_pos++] = val;
+}
+
+void pktbuf_put_uint16(PktBuf *buf, uint16 val)
+{
+       make_room(buf, 4);
+       if (buf->failed)
+               return;
+
+       buf->buf[buf->write_pos++] = (val >> 8) & 255;
+       buf->buf[buf->write_pos++] = val & 255;
+}
+
+void pktbuf_put_uint32(PktBuf *buf, uint32 val)
+{
+       uint8 *pos;
+
+       make_room(buf, 4);
+       if (buf->failed)
+               return;
+
+       pos = buf->buf + buf->write_pos;
+       pos[0] = (val >> 24) & 255;
+       pos[1] = (val >> 16) & 255;
+       pos[2] = (val >> 8) & 255;
+       pos[3] = val & 255; 
+       buf->write_pos += 4;
+}
+
+void pktbuf_put_uint64(PktBuf *buf, uint64 val)
+{
+       pktbuf_put_uint32(buf, val >> 32);
+       pktbuf_put_uint32(buf, (uint32)val);
+}
+
+void pktbuf_put_bytes(PktBuf *buf, const void *data, int len)
+{
+       make_room(buf, len);
+       if (buf->failed)
+               return;
+       memcpy(buf->buf + buf->write_pos, data, len);
+       buf->write_pos += len;
+}
+
+void pktbuf_put_string(PktBuf *buf, const char *str)
+{
+       int len = strlen(str);
+       pktbuf_put_bytes(buf, str, len + 1);
+}
+
+/*
+ * write header, remember pos to write length later.
+ */
+void pktbuf_start_packet(PktBuf *buf, int type)
+{
+       if (buf->failed)
+               return;
+
+       if (type < 256) {
+               /* new-style packet */
+               pktbuf_put_char(buf, type);
+               buf->pktlen_pos = buf->write_pos;
+               pktbuf_put_uint32(buf, 0);
+       } else {
+               /* old-style packet */
+               buf->pktlen_pos = buf->write_pos;
+               pktbuf_put_uint32(buf, 0);
+               pktbuf_put_uint32(buf, type);
+       }
+}
+
+void pktbuf_finish_packet(PktBuf *buf)
+{
+       uint8 *pos;
+       unsigned len;
+
+       if (buf->failed)
+               return;
+
+       len = buf->write_pos - buf->pktlen_pos;
+       pos = buf->buf + buf->pktlen_pos;
+       buf->pktlen_pos = 0;
+
+       *pos++ = (len >> 24) & 255;
+       *pos++ = (len >> 16) & 255;
+       *pos++ = (len >> 8) & 255;
+       *pos++ = len & 255; 
+}
+
+/* types:
+ * c - char/byte
+ * h - uint16
+ * i - uint32
+ * q - uint64
+ * s - Cstring
+ * b - bytes
+ */
+void pktbuf_write_generic(PktBuf *buf, int type, const char *pktdesc, ...)
+{
+       va_list ap;
+       int len;
+       const char *adesc = pktdesc;
+       uint8 *bin;
+
+       pktbuf_start_packet(buf, type);
+
+       va_start(ap, pktdesc);
+       while (*adesc) {
+               switch (*adesc) {
+               case 'c':
+                       pktbuf_put_char(buf, va_arg(ap, int));
+                       break;
+               case 'h':
+                       pktbuf_put_uint16(buf, va_arg(ap, int));
+                       break;
+               case 'i':
+                       pktbuf_put_uint32(buf, va_arg(ap, int));
+                       break;
+               case 'q':
+                       pktbuf_put_uint64(buf, va_arg(ap, uint64));
+                       break;
+               case 's':
+                       pktbuf_put_string(buf, va_arg(ap, char *));
+                       break;
+               case 'b':
+                       bin = va_arg(ap, uint8 *);
+                       len = va_arg(ap, int);
+                       pktbuf_put_bytes(buf, bin, len);
+                       break;
+               default:
+                       fatal("bad pktdesc: %s", pktdesc);
+               }
+               adesc++;
+       }
+       va_end(ap);
+
+       /* set correct length */
+       pktbuf_finish_packet(buf);
+}
+
+
+/* send resultset column info
+ * tupdesc keys:
+ * 'i' - int4
+ * 'q' - int8
+ * 's' - string
+ * 'T' - usec_t to date
+ */
+void pktbuf_write_RowDescription(PktBuf *buf, const char *tupdesc, ...)
+{
+       va_list ap;
+       char *name;
+       int i, ncol = strlen(tupdesc);
+
+       log_noise("write RowDescription");
+
+       pktbuf_start_packet(buf, 'T');
+
+       pktbuf_put_uint16(buf, ncol);
+
+       va_start(ap, tupdesc);
+       for (i = 0; i < ncol; i++) {
+               name = va_arg(ap, char *);
+
+               /* Fields: name, reloid, colnr, oid, typsize, typmod, fmt */
+               pktbuf_put_string(buf, name);
+               pktbuf_put_uint32(buf, 0);
+               pktbuf_put_uint16(buf, 0);
+               if (tupdesc[i] == 's') {
+                       pktbuf_put_uint32(buf, TEXTOID);
+                       pktbuf_put_uint16(buf, -1);
+               } else if (tupdesc[i] == 'i') {
+                       pktbuf_put_uint32(buf, INT4OID);
+                       pktbuf_put_uint16(buf, 4);
+               } else if (tupdesc[i] == 'q') {
+                       pktbuf_put_uint32(buf, INT8OID);
+                       pktbuf_put_uint16(buf, 8);
+               } else if (tupdesc[i] == 'T') {
+                       pktbuf_put_uint32(buf, TEXTOID);
+                       pktbuf_put_uint16(buf, -1);
+               } else
+                       fatal("bad tupdesc");
+               pktbuf_put_uint32(buf, 0);
+               pktbuf_put_uint16(buf, 0);
+       }
+       va_end(ap);
+
+       /* set correct length */
+       pktbuf_finish_packet(buf);
+}
+
+/*
+ * send DataRow.
+ *
+ * tupdesc keys:
+ * 'i' - int4
+ * 'q' - int8
+ * 's' - string
+ * 'T' - usec_t to date
+ */
+void pktbuf_write_DataRow(PktBuf *buf, const char *tupdesc, ...)
+{
+       char tmp[32];
+       const char *val = NULL;
+       int i, len, ncol = strlen(tupdesc);
+       va_list ap;
+
+       pktbuf_start_packet(buf, 'D');
+       pktbuf_put_uint16(buf, ncol);
+
+       va_start(ap, tupdesc);
+       for (i = 0; i < ncol; i++) {
+               if (tupdesc[i] == 'i') {
+                       sprintf(tmp, "%d", va_arg(ap, int));
+                       val = tmp;
+               } else if (tupdesc[i] == 'q') {
+                       sprintf(tmp, "%llu", (unsigned long long)va_arg(ap, uint64));
+                       val = tmp;
+               } else if (tupdesc[i] == 's') {
+                       val = va_arg(ap, char *);
+               } else if (tupdesc[i] == 'T') {
+                       usec_t time = va_arg(ap, usec_t);
+                       val = format_date(time);
+               } else
+                       fatal("bad tupdesc: %s", tupdesc);
+
+               if (val) {
+                       len = strlen(val);
+                       pktbuf_put_uint32(buf, len + 1);
+                       pktbuf_put_string(buf, val);
+               } else {
+                       /* NULL */
+                       pktbuf_put_uint32(buf, -1);
+               }
+       }
+       va_end(ap);
+
+       pktbuf_finish_packet(buf);
+}
+
diff --git a/src/pktbuf.h b/src/pktbuf.h
new file mode 100644 (file)
index 0000000..057f119
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Safe & easy creation of PostgreSQL packets.
+ */
+
+typedef struct PktBuf PktBuf;
+struct PktBuf {
+       uint8 *buf;
+       int buf_len;
+       int write_pos;
+       int pktlen_pos;
+
+       int send_pos;
+       struct event *ev;
+
+       unsigned failed:1;
+       unsigned sending:1;
+       unsigned fixed_buf:1;
+};
+
+/*
+ * pktbuf creation
+ */
+PktBuf *pktbuf_dynamic(int start_len);
+void pktbuf_static(PktBuf *buf, uint8 *data, int len);
+
+/*
+ * sending
+ */
+bool pktbuf_send_immidiate(PktBuf *buf, PgSocket *sk);
+void pktbuf_send_queued(PktBuf *buf, PgSocket *sk);
+
+/*
+ * low-level ops
+ */
+void pktbuf_start_packet(PktBuf *buf, int type);
+void pktbuf_put_char(PktBuf *buf, char val);
+void pktbuf_put_uint16(PktBuf *buf, uint16 val);
+void pktbuf_put_uint32(PktBuf *buf, uint32 val);
+void pktbuf_put_uint64(PktBuf *buf, uint64 val);
+void pktbuf_put_string(PktBuf *buf, const char *str);
+void pktbuf_put_bytes(PktBuf *buf, const void *data, int len);
+void pktbuf_finish_packet(PktBuf *buf);
+#define pktbuf_written(buf) ((buf)->write_pos)
+
+
+/*
+ * Packet writing
+ */
+void pktbuf_write_generic(PktBuf *buf, int type, const char *fmt, ...);
+void pktbuf_write_RowDescription(PktBuf *buf, const char *tupdesc, ...);
+void pktbuf_write_DataRow(PktBuf *buf, const char *tupdesc, ...);
+
+/*
+ * Shortcuts for actual packets.
+ */
+#define pktbuf_write_ParameterStatus(buf, key, val) \
+       pktbuf_write_generic(buf, 'S', "ss", key, val)
+
+#define pktbuf_write_AuthenticationOk(buf) \
+       pktbuf_write_generic(buf, 'R', "i", 0)
+
+#define pktbuf_write_ReadyForQuery(buf) \
+       pktbuf_write_generic(buf, 'Z', "c", 'I')
+
+#define pktbuf_write_CommandComplete(buf, desc) \
+       pktbuf_write_generic(buf, 'C', "s", desc)
+
+#define pktbuf_write_BackendKeyData(buf, key) \
+       pktbuf_write_generic(buf, 'K', "b", key, 8)
+
+#define pktbuf_write_CancelRequest(buf, key) \
+       pktbuf_write_generic(buf, PKT_CANCEL, "b", key, 8)
+
+#define pktbuf_write_StartupMessage(buf, user, parms, parms_len) \
+       pktbuf_write_generic(buf, PKT_STARTUP, "bsss", parms, parms_len, "user", user, "")
+
+#define pktbuf_write_PasswordMessage(buf, psw) \
+       pktbuf_write_generic(buf, 'p', "s", psw)
+
+/*
+ * Shortcut for creating DataRow in memory.
+ */
+
+#define BUILD_DataRow(reslen, dst, dstlen, args...) do { \
+       PktBuf _buf; \
+       pktbuf_static(&_buf, dst, dstlen); \
+       pktbuf_write_DataRow(&_buf, ## args); \
+       reslen = _buf.failed ? -1 : _buf.write_pos; \
+} while (0)
+
+/*
+ * Shortcuts for immidiate send of one packet.
+ */
+
+#define SEND_wrap(buflen, pktfn, res, sk, args...) do { \
+       uint8 _data[buflen]; PktBuf _buf; \
+       pktbuf_static(&_buf, _data, sizeof(_data)); \
+       pktfn(&_buf, ## args); \
+       res = pktbuf_send_immidiate(&_buf, sk); \
+} while (0)
+
+#define SEND_RowDescription(res, sk, args...) \
+       SEND_wrap(512, pktbuf_write_RowDescription, res, sk, ## args)
+
+#define SEND_generic(res, sk, args...) \
+       SEND_wrap(512, pktbuf_write_generic, res, sk, ## args)
+
+#define SEND_ReadyForQuery(res, sk) \
+       SEND_wrap(8, pktbuf_write_ReadyForQuery, res, sk)
+
+#define SEND_CancelRequest(res, sk, key) \
+       SEND_wrap(16, pktbuf_write_CancelRequest, res, sk, key)
+
+#define SEND_PasswordMessage(res, sk, psw) \
+       SEND_wrap(512, pktbuf_write_PasswordMessage, res, sk, psw)
+
+
+
diff --git a/src/pooler.c b/src/pooler.c
new file mode 100644 (file)
index 0000000..2f26e49
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Handling of pooler listening sockets
+ */
+
+#include "bouncer.h"
+
+static int fd_net = 0;
+static int fd_unix = 0;
+static struct event ev_net;
+static struct event ev_unix;
+static int suspended = 0;
+
+static struct event ev_err;
+static struct timeval err_timeout = {5, 0};
+
+static void cleanup_unix_socket(void)
+{
+       char fn[256];
+       if (!cf_unix_socket_dir || suspended)
+               return;
+       snprintf(fn, sizeof(fn), "%s/.s.PGSQL.%d",
+                       cf_unix_socket_dir, cf_listen_port);
+       unlink(fn);
+}
+
+void get_pooler_fds(int *p_net, int *p_unix)
+{
+       *p_net = fd_net;
+       *p_unix = fd_unix;
+}
+
+static int create_unix_socket(const char *socket_dir, int listen_port)
+{
+       struct sockaddr_un un;
+       int res, sock;
+       char lockfile[256];
+       struct stat st;
+
+       /* fill sockaddr struct */
+       memset(&un, 0, sizeof(un));
+       un.sun_family = AF_UNIX;
+       snprintf(un.sun_path, sizeof(un.sun_path),
+               "%s/.s.PGSQL.%d", socket_dir, listen_port);
+
+       /* check for lockfile */
+       snprintf(lockfile, sizeof(lockfile), "%s.lock", un.sun_path);
+       res = lstat(lockfile, &st);
+       if (res == 0)
+               fatal("unix port %d is in use", listen_port);
+
+       /* expect old bouncer gone */
+       unlink(un.sun_path);
+
+       /* create socket */
+       sock = socket(PF_UNIX, SOCK_STREAM, 0);
+       if (sock < 0)
+               fatal_perror("socket");
+
+       /* bind it */
+       res = bind(sock, (const struct sockaddr *)&un, sizeof(un));
+       if (res < 0)
+               fatal_perror("bind");
+
+       /* remove socket on shutdown */
+       atexit(cleanup_unix_socket);
+
+       /* set common options */
+       tune_socket(sock, true);
+
+       /* finally, accept connections */
+       res = listen(sock, 100);
+       if (res < 0)
+               fatal_perror("listen");
+
+       res = chmod(un.sun_path, 0777);
+       if (res < 0)
+               fatal_perror("chmod");
+
+       log_info("listening on unix:%s", un.sun_path);
+
+       return sock;
+}
+
+static int create_net_socket(const char *listen_addr, int listen_port)
+{
+       int sock;
+       struct sockaddr_in sa;
+       int res;
+       int val;
+
+       /* create socket */
+       sock = socket(AF_INET, SOCK_STREAM, 0);
+       if (sock < 0)
+               fatal_perror("socket");
+
+       /* parse address */
+       memset(&sa, 0, sizeof(sa));
+       sa.sin_family = AF_INET;
+       sa.sin_port = htons(cf_listen_port);
+       if (strcmp(listen_addr, "*") == 0) {
+               sa.sin_addr.s_addr = htonl(INADDR_ANY);
+       } else {
+               sa.sin_addr.s_addr = inet_addr(listen_addr);
+               if (sa.sin_addr.s_addr == INADDR_NONE)
+                       fatal("cannot parse addr: '%s'", listen_addr);
+       }
+
+       /* relaxed binding */
+       val = 1;
+       res = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+       if (res < 0)
+               fatal_perror("setsockopt");
+
+       /* bind to address */
+       res = bind(sock, (struct sockaddr *)&sa, sizeof(sa));
+       if (res < 0)
+               fatal_perror("bind");
+
+       /* set common options */
+       tune_socket(sock, false);
+
+#ifdef TCP_DEFER_ACCEPT
+       /*
+        * Notify pooler only when also data is arrived.
+        *
+        * optval specifies how long after connection attempt to wait for data.
+        *
+        * Related to tcp_synack_retries sysctl, default 5 (corresponds 180 secs).
+        */
+       if (cf_tcp_defer_accept > 0) {
+               val = cf_tcp_defer_accept;
+               res = setsockopt(sock, IPPROTO_TCP, TCP_DEFER_ACCEPT, &val, sizeof(val));
+               if (res < 0)
+                       fatal_perror("setsockopt TCP_DEFER_ACCEPT");
+       }
+#endif
+
+       /* finally, accept connections */
+       res = listen(sock, 100);
+       if (res < 0)
+               fatal_perror("listen");
+
+       log_info("listening on %s:%d", cf_listen_addr, cf_listen_port);
+
+       return sock;
+}
+
+static void err_wait_func(int sock, short flags, void *arg)
+{
+       resume_pooler();
+}
+
+/* got new connection, associate it with client struct */
+static void
+pool_accept(int sock, short flags, void *is_unix)
+{
+       int fd;
+       union {
+               struct sockaddr_in in;
+               struct sockaddr_un un;
+               struct sockaddr sa;
+       } addr;
+       socklen_t len = sizeof(addr);
+
+       /* get fd */
+       fd = accept(sock, &addr.sa, &len);
+       if (fd < 0) {
+               /*
+                * probably fd limit, pointess to try often
+                * wait a bit, hope that admin resolves somehow
+                */
+               log_error("accept() failed: %s", strerror(errno));
+               suspend_pooler();
+               evtimer_set(&ev_err, err_wait_func, NULL);
+               evtimer_add(&ev_err, &err_timeout);
+               return;
+       }
+
+       log_noise("new fd from accept=%d", fd);
+       if (is_unix) {
+               log_debug("P: new unix client");
+               {
+                       uid_t uid;
+                       log_noise("getuid(): %d", (int)getuid());
+                       if (get_unix_peer_uid(fd, &uid))
+                               log_noise("unix peer uid: %d", (int)uid);
+                       else
+                               log_noise("unix peer uid failed");
+               }
+               accept_client(fd, NULL, true);
+       } else {
+               log_debug("P: new tcp client");
+               accept_client(fd, &addr.in, false);
+       }
+}
+
+bool
+use_pooler_socket(int sock, bool is_unix)
+{
+       tune_socket(sock, is_unix);
+
+       if (is_unix)
+               fd_unix = sock;
+       else
+               fd_net = sock;
+       return true;
+}
+
+void
+suspend_pooler(void)
+{
+       suspended = 1;
+
+       if (fd_net)
+               event_del(&ev_net);
+       if (fd_unix)
+               event_del(&ev_unix);
+}
+
+void
+resume_pooler(void)
+{
+       suspended = 0;
+
+       if (fd_unix) {
+               event_set(&ev_unix, fd_unix, EV_READ | EV_PERSIST, pool_accept, "1");
+               event_add(&ev_unix, NULL);
+       }
+
+       if (fd_net) {
+               event_set(&ev_net, fd_net, EV_READ | EV_PERSIST, pool_accept, NULL);
+               event_add(&ev_net, NULL);
+       }
+}
+
+/* listen on socket - should happen after all other initializations */
+void
+pooler_setup(void)
+{
+       if (cf_listen_addr && !fd_net)
+               fd_net = create_net_socket(cf_listen_addr, cf_listen_port);
+
+       if (cf_unix_socket_dir && !fd_unix)
+               fd_unix = create_unix_socket(cf_unix_socket_dir, cf_listen_port);
+
+       if (!fd_net && !fd_unix)
+               fatal("nowhere to listen on");
+
+       resume_pooler();
+}
+
diff --git a/src/pooler.h b/src/pooler.h
new file mode 100644 (file)
index 0000000..a4536ca
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void pooler_setup(void);
+bool use_pooler_socket(int fd, bool is_unix);
+void resume_pooler(void);
+void suspend_pooler(void);
+void get_pooler_fds(int *p_net, int *p_unix);
+
diff --git a/src/proto.c b/src/proto.c
new file mode 100644 (file)
index 0000000..aef48b0
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Pieces that need to have detailed info about protocol.
+ */
+
+#include "bouncer.h"
+
+/*
+ * parse protocol header from MBuf
+ */
+
+/* parses pkt header from buffer, returns false if failed */
+bool get_header(MBuf *pkt, unsigned *pkt_type_p, unsigned *pkt_len_p)
+{
+       unsigned type;
+       unsigned len;
+       unsigned code;
+
+       if (mbuf_avail(pkt) < 5) {
+               log_noise("get_header: less then 5 bytes available");
+               return false;
+       }
+       type = mbuf_get_char(pkt);
+       if (type != 0) {
+               len = mbuf_get_uint32(pkt) + 1;
+       } else {
+               if (mbuf_get_char(pkt) != 0) {
+                       log_noise("get_header: unknown special pkt");
+                       return false;
+               }
+               /* dont tolerate partial pkt */
+               if (mbuf_avail(pkt) < 6) {
+                       log_noise("get_header: less that 6 bytes for special pkt");
+                       return false;
+               }
+               len = mbuf_get_uint16(pkt);
+               code = mbuf_get_uint32(pkt);
+               if (code == 80877102)
+                       type = PKT_CANCEL;
+               else if (code == 80877103)
+                       type = PKT_SSLREQ;
+               else if ((code >> 16) == 3 && (code & 0xFFFF) < 2)
+                       type = PKT_STARTUP;
+               else {
+                       log_noise("get_header: unknown special pkt: len=%u code=%u", len, code);
+                       return false;
+               }
+       }
+       *pkt_type_p = type;
+       *pkt_len_p = len;
+       return true;
+}
+
+
+/*
+ * Send error message packet to client.
+ */
+
+bool send_pooler_error(PgSocket *client, bool send_ready, const char *msg)
+{
+       uint8 tmpbuf[512];
+       PktBuf buf;
+
+       slog_error(client, "Pooler Error: %s", msg);
+
+       pktbuf_static(&buf, tmpbuf, sizeof(tmpbuf));
+       pktbuf_write_generic(&buf, 'E', "cscscsc",
+                            'S', "ERROR", 'C', "08P01", 'M', msg, 0);
+       if (send_ready)
+               pktbuf_write_ReadyForQuery(&buf);
+       return pktbuf_send_immidiate(&buf, client);
+}
+
+/*
+ * Parse server error message and log it.
+ */
+void log_server_error(const char *note, MBuf *pkt)
+{
+       const char *level = NULL, *msg = NULL, *val;
+       int type;
+       while (mbuf_avail(pkt)) {
+               type = mbuf_get_char(pkt);
+               if (type == 0)
+                       break;
+               val = mbuf_get_string(pkt);
+               if (!val)
+                       break;
+               if (type == 'S')
+                       level = val;
+               else if (type == 'M')
+                       msg = val;
+       }
+       if (!msg || !level)
+               log_error("%s: corrupt error message", note);
+       else
+               log_error("%s: %s: %s", note, level, msg);
+}
+
+
+/*
+ * Preparation of welcome message for client connection.
+ */
+
+/* add another server parameter packet to cache */
+bool add_welcome_parameter(PgSocket *server,
+                          unsigned pkt_type, unsigned pkt_len, MBuf *pkt)
+{
+       PgDatabase *db = server->pool->db;
+       PktBuf msg;
+       const char *key, *val;
+
+       if (db->welcome_msg_ready)
+               return true;
+
+       /* incomplete startup msg from server? */
+       if (pkt_len - 5 > mbuf_avail(pkt))
+               return false;
+
+       pktbuf_static(&msg, db->welcome_msg + db->welcome_msg_len,
+                     sizeof(db->welcome_msg) - db->welcome_msg_len);
+
+       if (db->welcome_msg_len == 0)
+               pktbuf_write_AuthenticationOk(&msg);
+
+       key = mbuf_get_string(pkt);
+       val = mbuf_get_string(pkt);
+       if (!key || !val) {
+               log_error("broken ParameterStatus packet");
+               return false;
+       }
+       log_debug("S: param: %s = %s", key, val);
+       pktbuf_write_ParameterStatus(&msg, key, val);
+       db->welcome_msg_len += pktbuf_written(&msg);
+
+       return true;
+}
+
+/* all parameters processed */
+void finish_welcome_msg(PgSocket *server)
+{
+       PgDatabase *db = server->pool->db;
+       if (db->welcome_msg_ready)
+               return;
+       db->welcome_msg_ready = 1;
+}
+
+bool welcome_client(PgSocket *client)
+{
+       int res;
+       uint8 buf[1024];
+       PktBuf msg;
+       PgDatabase *db = client->pool->db;
+
+       log_noise("P: welcome_client");
+       if (!db->welcome_msg_ready)
+               return false;
+
+       pktbuf_static(&msg, buf, sizeof(buf));
+       pktbuf_put_bytes(&msg, db->welcome_msg, db->welcome_msg_len);
+
+       /* give each client its own cancel key */
+       get_random_bytes(client->cancel_key, 8);
+       pktbuf_write_BackendKeyData(&msg, client->cancel_key);
+       pktbuf_write_ReadyForQuery(&msg);
+
+       /* send all together */
+       res = pktbuf_send_immidiate(&msg, client);
+       if (!res)
+               log_warning("unhandled failure to send welcome_msg");
+
+       return true;
+}
+
+/*
+ * Password authentication for server
+ */
+
+/* actual packet send */
+static void send_password(PgSocket *server, const char *enc_psw)
+{
+       bool res;
+       SEND_PasswordMessage(res, server, enc_psw);
+       if (!res)
+               disconnect_server(server, true,
+                                 "partial send unhandled in send_password");
+}
+
+static void login_clear_psw(PgSocket *server)
+{
+       log_debug("P: send clear password");
+       send_password(server, server->pool->user->passwd);
+}
+
+static void login_crypt_psw(PgSocket *server, const uint8 *salt)
+{
+       char saltbuf[3];
+       const char *enc;
+       PgUser *user = server->pool->user;
+
+       log_debug("P: send crypt password");
+       strncpy(saltbuf, (char *)salt, 2);
+       enc = pg_crypt(user->passwd, saltbuf);
+       send_password(server, enc);
+}
+
+
+static void login_md5_psw(PgSocket *server, const uint8 *salt)
+{
+       char txt[MD5_PASSWD_LEN + 1], *src;
+       PgUser *user = server->pool->user;
+
+       log_debug("P: send md5 password");
+       if (!isMD5(user->passwd)) {
+               pg_md5_encrypt(user->passwd, user->name, strlen(user->name), txt);
+               src = txt + 3;
+       } else
+               src = user->passwd + 3;
+       pg_md5_encrypt(src, (char *)salt, 4, txt);
+
+       send_password(server, txt);
+}
+
+/* answer server authentication request */
+bool answer_authreq(PgSocket *server,
+                   unsigned pkt_type, unsigned pkt_len,
+                   MBuf *pkt)
+{
+       unsigned cmd;
+       const uint8 *salt;
+
+       if (pkt_len < 5 + 4)
+               return false;
+       if (mbuf_avail(pkt) < pkt_len - 5)
+               return false;
+
+       cmd = mbuf_get_uint32(pkt);
+       switch (cmd) {
+       case 0:
+               log_debug("S: auth ok");
+               break;
+       case 3:
+               log_debug("S: req cleartext password");
+               login_clear_psw(server);
+               break;
+       case 4:
+               if (pkt_len < 5 + 4 + 2)
+                       return false;
+               log_debug("S: req crypt psw");
+               salt = mbuf_get_bytes(pkt, 2);
+               login_crypt_psw(server, salt);
+               break;
+       case 5:
+               if (pkt_len < 5 + 4 + 4)
+                       return false;
+               log_debug("S: req md5-crypted psw");
+               salt = mbuf_get_bytes(pkt, 4);
+               login_md5_psw(server, salt);
+               break;
+       case 2: /* kerberos */
+       case 6: /* scm something */
+               log_error("unsupported auth method: %d", cmd);
+       default:
+               log_error("unknown auth method: %d", cmd);
+       }
+       return true;
+}
+
+bool send_startup_packet(PgSocket *server)
+{
+       PgDatabase *db = server->pool->db;
+       const char *username = server->pool->user->name;
+       PktBuf pkt;
+       uint8 buf[512];
+
+       pktbuf_static(&pkt, buf, sizeof(buf));
+       pktbuf_write_StartupMessage(&pkt, username,
+                                   db->startup_params,
+                                   db->startup_params_len);
+       return pktbuf_send_immidiate(&pkt, server);
+}
+
+int scan_text_result(MBuf *pkt, const char *tupdesc, ...)
+{
+       char *val = NULL;
+       int len;
+       unsigned ncol, i;
+       va_list ap;
+
+       ncol = mbuf_get_uint16(pkt);
+       if (ncol != strlen(tupdesc))
+               fatal("different number of cols");
+
+       va_start(ap, tupdesc);
+       for (i = 0; i < ncol; i++) {
+               len = mbuf_get_uint32(pkt);
+               if (len < 0)
+                       val = NULL;
+               else
+                       val = (char *)mbuf_get_bytes(pkt, len);
+
+               if (tupdesc[i] == 'i') {
+                       int *dst_p = va_arg(ap, int *);
+                       *dst_p = atoi(val);
+               } else if (tupdesc[i] == 'q') {
+                       uint64 *dst_p = va_arg(ap, uint64 *);
+                       *dst_p = atoll(val);
+               } else if (tupdesc[i] == 's') {
+                       char **dst_p = va_arg(ap, char **);
+                       *dst_p = val;
+               } else
+                       fatal("bad tupdesc: %s", tupdesc);
+       }
+       va_end(ap);
+
+       if (mbuf_avail(pkt))
+               fatal("scan_text_result: unparsed data");
+
+       return ncol;
+}
+
diff --git a/src/proto.h b/src/proto.h
new file mode 100644 (file)
index 0000000..eeaf343
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+bool get_header(MBuf *pkt, unsigned *pkt_type_p, unsigned *pkt_len_p);
+
+bool send_pooler_error(PgSocket *client, bool send_ready, const char *msg);
+void log_server_error(const char *note, MBuf *pkt);
+
+bool add_welcome_parameter(PgSocket *server, unsigned pkt_type, unsigned pkt_len, MBuf *pkt);
+void finish_welcome_msg(PgSocket *server);
+bool welcome_client(PgSocket *client);
+
+bool answer_authreq(PgSocket *server, unsigned pkt_type, unsigned pkt_len, MBuf *pkt);
+
+bool send_startup_packet(PgSocket *server);
+
+int scan_text_result(MBuf *pkt, const char *tupdesc, ...);
+
diff --git a/src/sbuf.c b/src/sbuf.c
new file mode 100644 (file)
index 0000000..f64c75d
--- /dev/null
@@ -0,0 +1,500 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stream buffer
+ *
+ * The task is to copy data from one socket to another
+ * efficiently, while allowing callbacks to look
+ * at packet headers.
+ */
+
+#include "bouncer.h"
+
+/*
+ * if less that this amount of data is pending, then
+ * prefer to merge if with next recv()
+ */
+#define SMALL_PKT      16
+
+/* declare static stuff */
+static void sbuf_queue_send(SBuf *sbuf);
+static bool sbuf_send_pending(SBuf *sbuf);
+static bool sbuf_process_pending(SBuf *sbuf);
+static void sbuf_connect_cb(int sock, short flags, void *arg);
+static void sbuf_recv_cb(int sock, short flags, void *arg);
+static void sbuf_send_cb(int sock, short flags, void *arg);
+static void sbuf_try_resync(SBuf *sbuf);
+static void sbuf_wait_for_data(SBuf *sbuf);
+
+/*
+ * Call proto callback with proper MBuf.
+ *
+ * If callback returns true it used one of sbuf_prepare_* on sbuf,
+ * and processing can continue.
+ *
+ * If it returned false it used sbuf_pause(), sbuf_close() or simply
+ * wants to wait for next event loop (eg. too few data available).
+ * Callee should not touch sbuf in that case and just return to libevent.
+ */
+static inline bool sbuf_call_proto(SBuf *sbuf, int event)
+{
+       MBuf mbuf;
+       uint8 *pos = sbuf->buf + sbuf->pkt_pos;
+       int avail = sbuf->recv_pos - sbuf->pkt_pos;
+
+       Assert(avail >= 0);
+       Assert(pos + avail <= sbuf->buf + cf_sbuf_len);
+       Assert(event != SBUF_EV_READ || avail > 0);
+
+       mbuf_init(&mbuf, pos, avail);
+       return sbuf->proto_handler(sbuf, event, &mbuf, sbuf->arg);
+}
+
+/* lets wait for new data */
+static void sbuf_wait_for_data(SBuf *sbuf)
+{
+       event_set(&sbuf->ev, sbuf->sock, EV_READ | EV_PERSIST, sbuf_recv_cb, sbuf);
+       event_add(&sbuf->ev, NULL);
+}
+
+/* initialize SBuf with proto handler */
+void sbuf_init(SBuf *sbuf, sbuf_proto_cb_t proto_fn, void *arg)
+{
+       memset(sbuf, 0, sizeof(*sbuf));
+       sbuf->arg = arg;
+       sbuf->proto_handler = proto_fn;
+}
+
+/* got new socket from accept() */
+void sbuf_accept(SBuf *sbuf, int sock, bool is_unix)
+{
+       Assert(sbuf->pkt_pos == 0);
+       Assert(sbuf->recv_pos == 0);
+       Assert(sbuf->send_pos == 0);
+
+       tune_socket(sock, is_unix);
+       sbuf->sock = sock;
+       sbuf->is_unix = is_unix;
+
+       if (!cf_reboot) {
+               sbuf_wait_for_data(sbuf);
+
+               /* socket should already have some data (linux only) */
+               if (cf_tcp_defer_accept && !is_unix)
+                       sbuf_recv_cb(sbuf->sock, EV_READ, sbuf);
+       }
+}
+
+/* need to connect() to get a socket */
+void sbuf_connect(SBuf *sbuf, const PgAddr *addr, int timeout_sec)
+{
+       int res, sock, domain;
+       struct sockaddr_in sa_in;
+       struct sockaddr_un sa_un;
+       struct sockaddr *sa;
+       socklen_t len;
+       struct timeval timeout;
+
+       /* prepare sockaddr */
+       if (addr->is_unix) {
+               sa = (void*)&sa_un;
+               len = sizeof(sa_un);
+               memset(sa, 0, len);
+               sa_un.sun_family = AF_UNIX;
+               snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),
+                        "%s/.s.PGSQL.%d", cf_unix_socket_dir, addr->port);
+               domain = AF_UNIX;
+       } else {
+               sa = (void*)&sa_in;
+               len = sizeof(sa_in);
+               memset(sa, 0, len);
+               sa_in.sin_family = AF_INET;
+               sa_in.sin_addr = addr->ip_addr;
+               sa_in.sin_port = htons(addr->port);
+               domain = AF_INET;
+       }
+
+       /*
+        * common stuff
+        */
+       sock = socket(domain, SOCK_STREAM, 0);
+       if (sock < 0) {
+               /* probably fd limit, try to survive */
+               log_error("sbuf_connect: socket() failed: %s", strerror(errno));
+               sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+               return;
+       }
+
+       tune_socket(sock, addr->is_unix);
+
+       sbuf->is_unix = addr->is_unix;
+       sbuf->sock = sock;
+
+       timeout.tv_sec = timeout_sec;
+       timeout.tv_usec = 0;
+
+       /* launch connection */
+       res = connect(sock, sa, len);
+       log_noise("connect(%d)=%d", sock, res);
+       if (res == 0) {
+               /* unix socket gives connection immidiately */
+               sbuf_connect_cb(sock, EV_WRITE, sbuf);
+       } else if (res < 0 && errno == EINPROGRESS) {
+               /* tcp socket needs waiting */
+               event_set(&sbuf->ev, sock, EV_WRITE, sbuf_connect_cb, sbuf);
+               event_add(&sbuf->ev, &timeout);
+       } else {
+               /* failure */
+               log_warning("connect failed: res=%d/err=%s", res, strerror(errno));
+               close(sock);
+               sbuf->sock = 0;
+               sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+       }
+}
+
+/* dont wait for data on this socket */
+void sbuf_pause(SBuf *sbuf)
+{
+       Assert(sbuf->wait_send == 0);
+
+       event_del(&sbuf->ev);
+}
+
+/* resume from pause, start waiting for data */
+void sbuf_continue(SBuf *sbuf)
+{
+       sbuf_wait_for_data(sbuf);
+
+       /* there is some data already received */
+       sbuf_recv_cb(sbuf->sock, EV_READ, sbuf);
+}
+
+/*
+ * Resume from pause and give socket over to external
+ * callback function.
+ *
+ * The callback will be called with arg given to sbuf_init.
+ */
+void sbuf_continue_with_callback(SBuf *sbuf, sbuf_libevent_cb user_cb)
+{
+       event_set(&sbuf->ev, sbuf->sock, EV_READ | EV_PERSIST,
+                 user_cb, sbuf->arg);
+       event_add(&sbuf->ev, NULL);
+}
+
+/* socket cleanup & close */
+void sbuf_close(SBuf *sbuf)
+{
+       /* keep handler & arg values */
+       if (sbuf->sock > 0) {
+               event_del(&sbuf->ev);
+               safe_close(sbuf->sock);
+       }
+       sbuf->dst = NULL;
+       sbuf->sock = 0;
+       sbuf->pkt_pos = sbuf->pkt_remain = sbuf->recv_pos = 0;
+       sbuf->pkt_skip = sbuf->wait_send = sbuf->pkt_flush = 0;
+       sbuf->send_pos = sbuf->send_remain = 0;
+}
+
+/* proto_fn tells to send some bytes to socket */
+void sbuf_prepare_send(SBuf *sbuf, SBuf *dst, unsigned amount, bool flush)
+{
+       Assert(sbuf->pkt_remain == 0);
+       Assert(sbuf->pkt_skip == 0 || sbuf->send_remain == 0);
+       Assert(!sbuf->pkt_flush || sbuf->send_remain == 0);
+       Assert(amount > 0);
+
+       sbuf->pkt_skip = 0;
+       sbuf->pkt_remain = amount;
+       sbuf->pkt_flush = flush;
+       sbuf->dst = dst;
+}
+
+/* proto_fn tells to skip sone amount of bytes */
+void sbuf_prepare_skip(SBuf *sbuf, int amount)
+{
+       Assert(sbuf->pkt_remain == 0);
+       Assert(sbuf->pkt_skip == 0 || sbuf->send_remain == 0);
+       Assert(!sbuf->pkt_flush || sbuf->send_remain == 0);
+       Assert(amount > 0);
+
+       sbuf->pkt_skip = 1;
+       sbuf->pkt_remain = amount;
+       sbuf->pkt_flush = 0;
+       sbuf->dst = NULL;
+}
+
+/* libevent EV_WRITE: called when dest socket is writable again */
+static void sbuf_send_cb(int sock, short flags, void *arg)
+{
+       bool res;
+       SBuf *sbuf = arg;
+
+       sbuf->wait_send = 0;
+       res = sbuf_process_pending(sbuf);
+       if (res)
+               sbuf_wait_for_data(sbuf);
+}
+
+/* socket is full, wait until its writable again */
+static void sbuf_queue_send(SBuf *sbuf)
+{
+       sbuf->wait_send = 1;
+       event_del(&sbuf->ev);
+       event_set(&sbuf->ev, sbuf->dst->sock, EV_WRITE, sbuf_send_cb, sbuf);
+       event_add(&sbuf->ev, NULL);
+}
+
+/*
+ * Theres data in buffer to be sent. returns bool if processing can continue.
+ *
+ * Does not look at pkt_pos/remain fields, expects them to be merged to send_*
+ */
+static bool sbuf_send_pending(SBuf *sbuf)
+{
+       int res, avail;
+       uint8 *pos;
+
+try_more:
+       /* how much data is available for sending */
+       avail = sbuf->recv_pos - sbuf->send_pos;
+       if (avail > sbuf->send_remain)
+               avail = sbuf->send_remain;
+       if (avail == 0)
+               return true;
+
+       /* actually send it */
+       pos = sbuf->buf + sbuf->send_pos;
+       res = safe_send(sbuf->dst->sock, pos, avail, 0);
+       if (res >= 0) {
+               sbuf->send_remain -= res;
+               sbuf->send_pos += res;
+
+               if (res < avail) {
+                       /*
+                        * Should do sbuf_queue_send() immidiately?
+                        *
+                        * To be sure, lets run into EAGAIN.
+                        */
+                       goto try_more;
+               }
+               return true;
+       } else if (errno == EAGAIN) {
+               sbuf_queue_send(sbuf);
+               return false;
+       } else {
+               sbuf_call_proto(sbuf, SBUF_EV_SEND_FAILED);
+               return false;
+       }
+}
+
+/* process as much data as possible */
+static bool sbuf_process_pending(SBuf *sbuf)
+{
+       int avail;
+       bool full = sbuf->recv_pos == cf_sbuf_len;
+       bool res;
+
+       while (1) {
+               Assert(sbuf->recv_pos >= sbuf->pkt_pos);
+
+               /*
+                * Enough for now?
+                *
+                * The (avail <= SMALL_PKT) check is to avoid partial pkts.
+                * As SBuf should not assume knowledge about packets,
+                * the check is not done in !full case.  Packet handler can
+                * then still notify about partial packet by returning false.
+                */
+               avail = sbuf->recv_pos - sbuf->pkt_pos;
+               if (avail == 0 || (full && avail <= SMALL_PKT))
+                       break;
+
+               /* handle proto if start of packet */
+               if (sbuf->pkt_remain == 0) { /* start of new block */
+                       res = sbuf_call_proto(sbuf, SBUF_EV_READ);
+                       if (!res)
+                               return false;
+                       Assert(sbuf->pkt_remain > 0);
+               }
+
+               /* walk pkt, merge sends */
+               if (avail > sbuf->pkt_remain)
+                       avail = sbuf->pkt_remain;
+               if (!sbuf->pkt_skip) {
+                       if (sbuf->send_remain == 0)
+                               sbuf->send_pos = sbuf->pkt_pos;
+                       sbuf->send_remain += avail;
+               }
+               sbuf->pkt_remain -= avail;
+               sbuf->pkt_pos += avail;
+
+               /* send data */
+               if (sbuf->pkt_skip || sbuf->pkt_flush) {
+                       res = sbuf_send_pending(sbuf);
+                       if (!res)
+                               return false;
+               }
+       }
+
+       return sbuf_send_pending(sbuf);
+}
+
+/* reposition at buffer start again */
+static void sbuf_try_resync(SBuf *sbuf)
+{
+       int avail;
+
+       if (sbuf->pkt_pos == 0)
+               return;
+
+       if (sbuf->send_remain > 0)
+               avail = sbuf->recv_pos - sbuf->send_pos;
+       else
+               avail = sbuf->recv_pos - sbuf->pkt_pos;
+
+       if (avail == 0) {
+               sbuf->recv_pos = sbuf->pkt_pos = sbuf->send_pos = 0;
+       } else if (avail <= SMALL_PKT) {
+               if (sbuf->send_remain > 0) {
+                       memmove(sbuf->buf, sbuf->buf + sbuf->send_pos, avail);
+                       sbuf->pkt_pos -= sbuf->send_pos;
+                       sbuf->send_pos = 0;
+                       sbuf->recv_pos = avail;
+               } else {
+                       memmove(sbuf->buf, sbuf->buf + sbuf->pkt_pos, avail);
+                       sbuf->send_pos = 0;
+                       sbuf->pkt_pos = 0;
+                       sbuf->recv_pos = avail;
+               }
+       }
+}
+
+/* actually ask kernel for more data */
+static bool sbuf_actual_recv(SBuf *sbuf, int len)
+{
+       int got;
+       uint8 *pos;
+
+       pos = sbuf->buf + sbuf->recv_pos;
+       got = safe_recv(sbuf->sock, pos, len, 0);
+
+       if (got == 0) {
+               /* eof from socket */
+               sbuf_call_proto(sbuf, SBUF_EV_RECV_FAILED);
+               return false;
+       } else if (got < 0) {
+               if (errno == EAGAIN) {
+                       /* we tried too much, socket is empty.
+                          act as zero bytes was read */
+                       got = 0;
+               } else {
+                       /* some error occured */
+                       sbuf_call_proto(sbuf, SBUF_EV_RECV_FAILED);
+                       return false;
+               }
+       }
+       sbuf->recv_pos += got;
+       return true;
+}
+
+/* callback for libevent EV_READ */
+static void sbuf_recv_cb(int sock, short flags, void *arg)
+{
+       int free, ok;
+       SBuf *sbuf = arg;
+
+       /* reading should be disabled when waiting */
+       Assert(sbuf->wait_send == 0);
+
+try_more:
+       /* make room in buffer */
+       sbuf_try_resync(sbuf);
+
+       /*
+        * FIXME: When called from sbuf_continue(), there is already
+        * data waiting.  Thus there will be unneccesary recv().
+        */
+       free = cf_sbuf_len - sbuf->recv_pos;
+       if (free > SMALL_PKT) {
+               ok = sbuf_actual_recv(sbuf, free);
+               if (!ok)
+                       return;
+       }
+
+       /* now handle it */
+       ok = sbuf_process_pending(sbuf);
+
+       /* if the buffer is full, there can be more data available */
+       if (ok && sbuf->recv_pos == cf_sbuf_len)
+               goto try_more;
+}
+
+/* check if there is any error pending on socket */
+static bool sbuf_after_connect_check(SBuf *sbuf)
+{
+       int optval = 0, err;
+       socklen_t optlen = sizeof(optval);
+
+       err = getsockopt(sbuf->sock, SOL_SOCKET, SO_ERROR, (void*)&optval, &optlen);
+       if (err < 0) {
+               log_error("sbuf_after_connect_check: getsockopt: %s",
+                               strerror(errno));
+               return false;
+       }
+       if (optval != 0) {
+               log_error("sbuf_after_connect_check: pending error: %s",
+                               strerror(optval));
+               return false;
+       }
+       return true;
+}
+
+/* callback for libevent EV_WRITE when connecting */
+static void sbuf_connect_cb(int sock, short flags, void *arg)
+{
+       SBuf *sbuf = arg;
+
+       if (flags & EV_WRITE) {
+               if (sbuf_after_connect_check(sbuf)) {
+                       if (sbuf_call_proto(sbuf, SBUF_EV_CONNECT_OK))
+                               sbuf_wait_for_data(sbuf);
+               } else
+                       sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+       } else {
+               /* EV_TIMEOUT */
+               sbuf_call_proto(sbuf, SBUF_EV_CONNECT_FAILED);
+       }
+}
+
+/* send some data to listening socket */
+bool sbuf_answer(SBuf *sbuf, const void *buf, int len)
+{
+       int res;
+       if (sbuf->sock <= 0)
+               return false;
+       res = safe_send(sbuf->sock, buf, len, 0);
+       if (res < 0)
+               log_error("sbuf_answer: error sending: %s", strerror(errno));
+       else if (res != len)
+               log_error("sbuf_answer: partial send: len=%d sent=%d", len, res);
+       return res == len;
+}
+
diff --git a/src/sbuf.h b/src/sbuf.h
new file mode 100644 (file)
index 0000000..56d553b
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+typedef enum {
+       SBUF_EV_READ,
+       SBUF_EV_RECV_FAILED,
+       SBUF_EV_SEND_FAILED,
+       SBUF_EV_CONNECT_FAILED,
+       SBUF_EV_CONNECT_OK
+} SBufEvent;
+
+typedef struct SBuf SBuf;
+
+/* callback should return true if it used one of sbuf_prepare_* on sbuf,
+   false if it used sbuf_pause(), sbuf_close() or simply wants to wait for
+   next event loop (eg. too few data available). */
+typedef bool (*sbuf_proto_cb_t)(SBuf *sbuf,
+                               SBufEvent evtype,
+                               MBuf *mbuf,
+                               void *arg);
+
+/* for some reason, libevent has no typedef for callback */
+typedef void (*sbuf_libevent_cb)(int, short, void *);
+
+struct SBuf {
+       /* libevent handle */
+       struct event ev;
+
+       /* protocol callback function */
+       sbuf_proto_cb_t proto_handler;
+       void *arg;
+
+       /* fd for this socket */
+       int sock;
+
+       /* dest SBuf for current packet */
+       SBuf *dst;
+
+       unsigned recv_pos;
+       unsigned pkt_pos;
+       unsigned pkt_remain;
+       unsigned send_pos;
+       unsigned send_remain;
+
+       unsigned wait_send:1;
+       unsigned pkt_skip:1;
+       unsigned pkt_flush:1;
+       unsigned is_unix:1;
+
+       uint8 buf[0];
+};
+
+#define sbuf_socket(sbuf) ((sbuf)->sock)
+
+void sbuf_init(SBuf *sbuf, sbuf_proto_cb_t proto_fn, void *arg);
+void sbuf_accept(SBuf *sbuf, int read_sock, bool is_unix);
+void sbuf_connect(SBuf *sbuf, const PgAddr *addr, int timeout_sec);
+
+void sbuf_pause(SBuf *sbuf);
+void sbuf_continue(SBuf *sbuf);
+void sbuf_close(SBuf *sbuf);
+
+/* proto_fn can use those functions to order behaviour */
+void sbuf_prepare_send(SBuf *sbuf, SBuf *dst, unsigned amount, bool flush);
+void sbuf_prepare_skip(SBuf *sbuf, int amount);
+
+bool sbuf_answer(SBuf *sbuf, const void *buf, int len);
+
+void sbuf_continue_with_callback(SBuf *sbuf, sbuf_libevent_cb cb);
+
+static inline bool sbuf_empty(SBuf *sbuf)
+{
+       return sbuf->send_pos == sbuf->recv_pos
+               && sbuf->pkt_remain == 0;
+}
+
diff --git a/src/server.c b/src/server.c
new file mode 100644 (file)
index 0000000..c89341e
--- /dev/null
@@ -0,0 +1,277 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Handling of server connections
+ */
+
+#include "bouncer.h"
+
+/* process packets on server auth phase */
+static bool handle_server_startup(PgSocket *server, MBuf *pkt)
+{
+       unsigned pkt_type;
+       unsigned pkt_len;
+       SBuf *sbuf = &server->sbuf;
+       bool res = false;
+
+       if (!get_header(pkt, &pkt_type, &pkt_len)) {
+               disconnect_server(server, true, "bad pkt in login phase");
+               return false;
+       }
+
+       if (pkt_len > mbuf_avail(pkt) + 5) {
+               disconnect_server(server, true, "partial pkt in login phase");
+               return false;
+       }
+
+       log_noise("S: pkt '%c', len=%d", pkt_type, pkt_len);
+
+       switch (pkt_type) {
+       default:
+               slog_error(server, "unknown pkt from server: '%c'", pkt_type);
+               disconnect_server(server, true, "unknown pkt from server");
+               break;
+       case 'E':               /* ErrorResponse */
+               log_server_error("S: login failed", pkt);
+               disconnect_server(server, true, "login failed");
+               break;
+
+       /* packets that need closer look */
+       case 'R':               /* AuthenticationXXX */
+               log_debug("calling login_answer");
+               res = answer_authreq(server, pkt_type, pkt_len, pkt);
+               break;
+       case 'S':               /* ParameterStatus */
+               res = add_welcome_parameter(server, pkt_type, pkt_len, pkt);
+               break;
+       case 'Z':               /* ReadyForQuery */
+               /* login ok */
+               log_debug("server login ok, start accepting queries");
+               server->ready = 1;
+
+               finish_welcome_msg(server);
+               release_server(server);
+
+               /* let the takeover process handle it */
+               if (server->pool->admin)
+                       takeover_login(server);
+               res = true;
+               break;
+
+       /* ignorable packets */
+       case 'K':               /* BackendKeyData */
+               if (mbuf_avail(pkt) >= 8)
+                       memcpy(server->cancel_key, mbuf_get_bytes(pkt, 8), 8);
+               res = true;
+               break;
+       case 'N':               /* NoticeResponse */
+               slog_noise(server, "skipping pkt: %c", pkt_type);
+               res = true;
+               break;
+       }
+
+       if (res)
+               sbuf_prepare_skip(sbuf, pkt_len);
+
+       return res;
+}
+
+/* process packets on logged in connection */
+static bool handle_server_work(PgSocket *server, MBuf *pkt)
+{
+       unsigned pkt_type;
+       unsigned pkt_len;
+       bool flush = 0;
+       bool ready = 0;
+       char state;
+       SBuf *sbuf = &server->sbuf;
+       PgSocket *client = server->link;
+
+       Assert(!server->pool->admin);
+
+       if (!get_header(pkt, &pkt_type, &pkt_len)) {
+               disconnect_server(server, true, "bad pkt header");
+               return false;
+       }
+       slog_noise(server, "pkt='%c' len=%d", pkt_type, pkt_len);
+
+       switch (pkt_type) {
+       default:
+               slog_error(server, "unknown pkt: '%c'", pkt_type);
+               disconnect_server(server, true, "unknown pkt");
+               return false;
+       
+       /* pooling decisions will be based on this packet */
+       case 'Z':               /* ReadyForQuery */
+
+               /* if partial pkt, wait */
+               if (mbuf_avail(pkt) == 0)
+                       return false;
+               state = mbuf_get_char(pkt);
+
+               /* set ready only if no tx */
+               if (state == 'I')
+                       ready = 1;
+               else if (cf_pool_mode == POOL_STMT) {
+                       disconnect_server(server, true,
+                                         "Long transactions not allowed");
+                       return false;
+               }
+
+       case 'E':               /* ErrorResponse */
+       case 'N':               /* NoticeResponse */
+
+               /* above packers need to be sent immidiately */
+               flush = 1;
+
+       /*
+        * chat packets, but server (and thus pooler)
+        * is allowed to buffer them until Sync or Flush
+        * is sent by client.
+        */
+       case '2':               /* BindComplete */
+       case '3':               /* CloseComplete */
+       case 'c':               /* CopyDone(F/B) */
+       case 'f':               /* CopyFail(F/B) */
+       case 'I':               /* EmptyQueryResponse == CommandComplete */
+       case 'V':               /* FunctionCallResponse */
+       case 'n':               /* NoData */
+       case 'G':               /* CopyInResponse */
+       case 'H':               /* CopyOutResponse */
+       case '1':               /* ParseComplete */
+       case 'A':               /* NotificationResponse */
+       case 's':               /* PortalSuspended */
+       case 'C':               /* CommandComplete */
+
+               /* check if client wanted immidiate response */
+               if (client && client->flush_req) {
+                       flush = 1;
+                       client->flush_req = 0;
+               }
+
+       /* data packets, there will be more coming */
+       case 'd':               /* CopyData(F/B) */
+       case 'D':               /* DataRow */
+       case 't':               /* ParameterDescription */
+       case 'S':               /* ParameterStatus */
+       case 'T':               /* RowDescription */
+
+               if (client) {
+                       sbuf_prepare_send(sbuf, &client->sbuf, pkt_len, flush);
+               } else {
+                       if (server->state != SV_TESTED)
+                               log_warning("got packet '%c' from server"
+                                               " when not linked", pkt_type);
+                       sbuf_prepare_skip(sbuf, pkt_len);
+               }
+               break;
+       }
+       server->ready = ready;
+
+       /* update stats */
+       server->pool->stats.server_bytes += pkt_len;
+       if (server->ready && client) {
+               usec_t total;
+               Assert(client->query_start != 0);
+               
+               total = get_time_usec() - client->query_start;
+               client->query_start = 0;
+               server->pool->stats.query_time += total;
+               slog_debug(client, "query time: %d us", (int)total);
+       }
+
+       if (ready && (     cf_pool_mode  != POOL_SESSION
+                       || server->state == SV_TESTED))
+               release_server(server);
+
+       return true;
+}
+
+/* got connection, decide what to do */
+static bool handle_connect(PgSocket *server)
+{
+       bool res = false;
+       PgPool *pool = server->pool;
+
+       if (!statlist_empty(&pool->cancel_req_list)) {
+               slog_debug(server, "use it for pending cancel req");
+               /* if pending cancel req, send it */
+               forward_cancel_request(server);
+               /* notify disconnect_server() that connect did not fail */
+               server->ready = 1;
+               disconnect_server(server, false, "sent cancel req");
+       } else {
+               /* proceed with login */
+               res = send_startup_packet(server);
+               if (!res)
+                       disconnect_server(server, false, "startup pkt failed");
+       }
+       return res;
+}
+
+/* callback from SBuf */
+bool server_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg)
+{
+       bool res = false;
+       PgSocket *server = arg;
+
+       Assert(is_server_socket(server));
+       Assert(server->state != SV_FREE);
+
+       switch (evtype) {
+       case SBUF_EV_RECV_FAILED:
+               disconnect_server(server, false, "server conn crashed?");
+               break;
+       case SBUF_EV_SEND_FAILED:
+               disconnect_client(server->link, false, "unexpected eof");
+               break;
+       case SBUF_EV_READ:
+               if (mbuf_avail(pkt) < 5) {
+                       log_noise("S: got partial header, trying to wait a bit");
+                       return false;
+               }
+
+               server->request_time = get_cached_time();
+               switch (server->state) {
+               case SV_LOGIN:
+                       res = handle_server_startup(server, pkt);
+                       break;
+               case SV_TESTED:
+               case SV_USED:
+               case SV_ACTIVE:
+               case SV_IDLE:
+                       res = handle_server_work(server, pkt);
+                       break;
+               default:
+                       fatal("server_proto: server in bad state: %d", server->state);
+               }
+               break;
+       case SBUF_EV_CONNECT_FAILED:
+               Assert(server->state == SV_LOGIN);
+               disconnect_server(server, false, "connect failed");
+               break;
+       case SBUF_EV_CONNECT_OK:
+               log_debug("S: connect ok");
+               Assert(server->state == SV_LOGIN);
+               server->request_time = get_cached_time();
+               res = handle_connect(server);
+       }
+       return res;
+}
+
diff --git a/src/server.h b/src/server.h
new file mode 100644 (file)
index 0000000..f99aa40
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+bool server_proto(SBuf *sbuf, SBufEvent evtype, MBuf *pkt, void *arg);
+
diff --git a/src/stats.c b/src/stats.c
new file mode 100644 (file)
index 0000000..c891ff2
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bouncer.h"
+
+static struct event ev_stats;
+static usec_t old_stamp, new_stamp;
+
+static void reset_stats(PgStats *stat)
+{
+       stat->server_bytes = 0;
+       stat->client_bytes = 0;
+       stat->request_count = 0;
+       stat->query_time = 0;
+}
+
+static void stat_add(PgStats *total, PgStats *stat)
+{
+       total->server_bytes += stat->server_bytes;
+       total->client_bytes += stat->client_bytes;
+       total->request_count += stat->request_count;
+       total->query_time += stat->query_time;
+}
+
+static void calc_average(PgStats *avg, PgStats *cur, PgStats *old)
+{
+       uint64 qcount;
+       usec_t dur = get_cached_time() - old_stamp;
+
+       reset_stats(avg);
+
+       if (dur <= 0)
+               return;
+
+       avg->request_count = USEC * (cur->request_count - old->request_count) / dur;
+       avg->client_bytes = USEC * (cur->client_bytes - old->client_bytes) / dur;
+       avg->server_bytes = USEC * (cur->server_bytes - old->server_bytes) / dur;
+       qcount = cur->request_count - old->request_count;
+       if (qcount > 0)
+               avg->query_time = (cur->query_time - old->query_time) / qcount;
+}
+
+static void write_stats(PktBuf *buf, PgStats *stat, PgStats *old, char *dbname)
+{
+       PgStats avg;
+       calc_average(&avg, stat, old);
+       pktbuf_write_DataRow(buf, "sqqqqqqqq", dbname,
+                            stat->request_count, stat->client_bytes,
+                            stat->server_bytes, stat->query_time,
+                            avg.request_count, avg.client_bytes,
+                            avg.server_bytes, avg.query_time);
+}
+
+bool admin_database_stats(PgSocket *client, StatList *pool_list)
+{
+       PgPool *pool;
+       List *item;
+       PgDatabase *cur_db = NULL;
+       PgStats st_total, st_db, old_db, old_total;
+       int rows = 0;
+       PktBuf *buf;
+
+       reset_stats(&st_total);
+       reset_stats(&st_db);
+       reset_stats(&old_db);
+       reset_stats(&old_total);
+
+       buf = pktbuf_dynamic(512);
+       if (!buf) {
+               admin_error(client, "no mem");
+               return true;
+       }
+
+       pktbuf_write_RowDescription(buf, "sqqqqqqqq", "database",
+                                   "total_requests", "total_received",
+                                   "total_sent", "total_query_time",
+                                   "avg_req", "avg_recv", "avg_sent",
+                                   "avg_query");
+       statlist_for_each(item, pool_list) {
+               pool = container_of(item, PgPool, head);
+
+               if (!cur_db)
+                       cur_db = pool->db;
+
+               if (pool->db != cur_db) {
+                       write_stats(buf, &st_db, &old_db, cur_db->name);
+
+                       rows ++;
+                       cur_db = pool->db;
+                       stat_add(&st_total, &st_db);
+                       stat_add(&old_total, &old_db);
+                       reset_stats(&st_db);
+                       reset_stats(&old_db);
+               }
+
+               stat_add(&st_db, &pool->stats);
+               stat_add(&old_db, &pool->older_stats);
+       }
+       if (cur_db) {
+               write_stats(buf, &st_db, &old_db, cur_db->name);
+               stat_add(&st_total, &st_db);
+               stat_add(&old_total, &old_db);
+               rows ++;
+       }
+       admin_flush(client, buf, "SHOW");
+
+       return true;
+}
+
+static void refresh_stats(int s, short flags, void *arg)
+{
+       List *item;
+       PgPool *pool;
+       struct timeval period = { cf_stats_period, 0 };
+       PgStats old_total, cur_total, avg;
+
+       reset_stats(&old_total);
+       reset_stats(&cur_total);
+
+       old_stamp = new_stamp;
+       new_stamp = get_cached_time();
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               pool->older_stats = pool->newer_stats;
+               pool->newer_stats = pool->stats;
+
+               stat_add(&cur_total, &pool->stats);
+               stat_add(&old_total, &pool->older_stats);
+       }
+       evtimer_add(&ev_stats, &period);
+
+       calc_average(&avg, &cur_total, &old_total);
+       /* send totals to logfile */
+       log_info("Stats: %llu req/s, in %llu b/s, "
+                "out %llu b/s, query %llu us",
+                avg.request_count, avg.client_bytes,
+                avg.server_bytes, avg.query_time);
+}
+
+void stats_setup(void)
+{
+       struct timeval period = { cf_stats_period, 0 };
+
+       new_stamp = get_time_usec();
+       old_stamp = new_stamp - USEC;
+
+       /* launch maintenance */
+       evtimer_set(&ev_stats, refresh_stats, NULL);
+       evtimer_add(&ev_stats, &period);
+}
+
diff --git a/src/stats.h b/src/stats.h
new file mode 100644 (file)
index 0000000..cfaeb0a
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void stats_setup(void);
+
+bool admin_database_stats(PgSocket *client, StatList *pool_list);
+
diff --git a/src/system.h b/src/system.h
new file mode 100644 (file)
index 0000000..8a813ba
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Required system headers
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "../config.h"
+#endif
+
+#define _GNU_SOURCE
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <limits.h>
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#ifdef HAVE_CRYPT_H
+#include <crypt.h>
+#endif
+
+#ifdef CASSERT
+#define Assert(e) do { if (!(e)) fatal("Assert(%s) failed", #e); } while (0)
+#else
+#define Assert(e)
+#endif
+
+#ifndef OPEN_MAX
+#define OPEN_MAX sysconf(_SC_OPEN_MAX)
+#endif
+
+/* how many microseconds in a second */
+#define USEC (1000000LL)
+
+typedef enum { false=0, true=1 } bool;
+
+typedef uint8_t uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+
+#define INT8OID 20
+#define INT4OID 23
+#define TEXTOID 25
+
diff --git a/src/takeover.c b/src/takeover.c
new file mode 100644 (file)
index 0000000..94bb1b3
--- /dev/null
@@ -0,0 +1,284 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Connect to running bouncer process, load fds from it, shut it down
+ * and continue with them.
+ *
+ * Each row from SHOW FDS will have corresponging fd in ancillary message.
+ *
+ * Manpages: unix, sendmsg, recvmsg, cmsg, readv
+ */
+
+#include "bouncer.h"
+
+/*
+ * Takeover done, old process shut down,
+ * kick this one running.
+ */
+static void takeover_finish(PgSocket *bouncer)
+{
+       disconnect_server(bouncer, false, "disko over");
+       cf_reboot = 0;
+       resume_all();
+}
+
+/* parse msg for fd and info */
+static bool takeover_load_fd(MBuf *pkt, const struct cmsghdr *cmsg)
+{
+       int fd;
+       char *task, *s_addr, *user, *db;
+       int oldfd, port, linkfd;
+       uint64 ckey;
+       PgAddr addr;
+
+       memset(&addr, 0, sizeof(addr));
+
+       if (cmsg->cmsg_level == SOL_SOCKET
+               && cmsg->cmsg_type == SCM_RIGHTS
+               && cmsg->cmsg_len >= CMSG_LEN(sizeof(int)))
+       {
+               /* get the fd */
+               memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
+               log_debug("got fd: %d", fd);
+       } else
+               fatal("broken fd packet");
+
+       /* parse row contents */
+       scan_text_result(pkt, "issssiqi", &oldfd, &task, &user, &db,
+                       &s_addr, &port, &ckey, &linkfd);
+       if (task == NULL || s_addr == NULL)
+               fatal("NULL data from old process");
+
+       log_debug("FD row: fd=%d(%d) linkfd=%d task=%s user=%s db=%s",
+                 oldfd, fd, linkfd, task,
+                 user ? user : "NULL",
+                 db ? db : "NULL");
+
+       /* fill address */
+       addr.is_unix = strcmp(s_addr, "unix") == 0 ? true : false;
+       if (addr.is_unix) {
+               addr.port = cf_listen_port;
+       } else {
+               addr.ip_addr.s_addr = inet_addr(s_addr);
+               addr.port = port;
+       }
+
+       /* decide what to do with it */
+       if (strcmp(task, "client") == 0)
+               use_client_socket(fd, &addr, db, user, ckey, oldfd, linkfd);
+       else if (strcmp(task, "server") == 0)
+               use_server_socket(fd, &addr, db, user, ckey, oldfd, linkfd);
+       else if (strcmp(task, "pooler") == 0)
+               use_pooler_socket(fd, addr.is_unix);
+       else
+               fatal("unknown task: %s", task);
+
+       return true;
+}
+
+static void takeover_create_link(PgPool *pool, PgSocket *client)
+{
+       List *item;
+       PgSocket *server;
+
+       statlist_for_each(item, &pool->active_server_list) {
+               server = container_of(item, PgSocket, head);
+               if (server->tmp_sk_oldfd == client->tmp_sk_linkfd) {
+                       server->link = client;
+                       client->link = server;
+                       return;
+               }
+       }
+       fatal("takeover_create_link: failed to find pair");
+}
+
+/* clean the inappropriate places the old fds got stored in */
+static void takeover_clean_socket_list(StatList *list)
+{
+       List *item;
+       PgSocket *sk;
+       statlist_for_each(item, list) {
+               sk = container_of(item, PgSocket, head);
+               if (sk->suspended) {
+                       sk->tmp_sk_oldfd = get_cached_time();
+                       sk->tmp_sk_linkfd = get_cached_time();
+               }
+       }
+}
+
+/* all fds loaded, create links */
+static void takeover_postprocess_fds(void)
+{
+       List *item, *item2;
+       PgSocket *client;
+       PgPool *pool;
+
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               if (pool->admin)
+                       continue;
+               statlist_for_each(item2, &pool->active_client_list) {
+                       client = container_of(item2, PgSocket, head);
+                       if (client->suspended && client->tmp_sk_linkfd)
+                               takeover_create_link(pool, client);
+               }
+       }
+       statlist_for_each(item, &pool_list) {
+               pool = container_of(item, PgPool, head);
+               takeover_clean_socket_list(&pool->active_client_list);
+               takeover_clean_socket_list(&pool->active_server_list);
+               takeover_clean_socket_list(&pool->idle_server_list);
+       }
+}
+
+static void next_command(PgSocket *bouncer, MBuf *pkt)
+{
+       bool res = true;
+       const char *cmd = mbuf_get_string(pkt);
+
+       log_debug("takeover_recv_fds: 'C' body: %s", cmd);
+       if (strcmp(cmd, "SUSPEND") == 0) {
+               log_info("SUSPEND finished, sending SHOW FDS");
+               SEND_generic(res, bouncer, 'Q', "s", "SHOW FDS;");
+       } else if (strncmp(cmd, "SHOW", 4) == 0) {
+
+               log_info("SHOW FDS finished, sending SHUTDOWN");
+
+               /* all fds loaded, review them */
+               takeover_postprocess_fds();
+
+               /* all OK, kill old one */
+               SEND_generic(res, bouncer, 'Q', "s", "SHUTDOWN;");
+       } else
+               fatal("got bad CMD from old bouncer: %s", cmd);
+
+       if (!res)
+               fatal("command send failed");
+}
+
+static void takeover_parse_data(PgSocket *bouncer,
+                               struct msghdr *msg, MBuf *data)
+{
+       struct cmsghdr *cmsg;
+       unsigned pkt_type, pkt_len;
+       uint8 *pktptr;
+       MBuf pkt;
+       
+       cmsg = msg->msg_controllen ? CMSG_FIRSTHDR(msg) : NULL;
+
+       while (mbuf_avail(data) > 0) {
+               if (!get_header(data, &pkt_type, &pkt_len))
+                       fatal("cannot parse packet");
+
+               pktptr = (uint8*)mbuf_get_bytes(data, pkt_len - 5);
+               mbuf_init(&pkt, pktptr, pkt_len - 5);
+
+               switch (pkt_type) {
+               case 'T': /* RowDescription */
+                       log_debug("takeover_parse_data: 'T'");
+                       break;
+               case 'D': /* DataRow */
+                       log_debug("takeover_parse_data: 'D'");
+                       if (cmsg) {
+                               takeover_load_fd(&pkt, cmsg);
+                               cmsg = CMSG_NXTHDR(msg, cmsg);
+                       } else
+                               fatal("got row without fd info");
+                       break;
+               case 'Z': /* ReadyForQuery */
+                       log_debug("takeover_parse_data: 'Z'");
+                       break;
+               case 'C': /* CommandComplete */
+                       log_debug("takeover_parse_data: 'C'");
+                       next_command(bouncer, &pkt);
+                       break;
+               case 'E': /* ErrorMessage */
+                       log_server_error("old bouncer sent", &pkt);
+                       fatal("something failed");
+               default:
+                       fatal("takeover_parse_data: unexpected pkt: '%c'", pkt_type);
+               }
+       }
+}
+
+/*
+ * listen for data from old bouncer.
+ *
+ * use always sendmsg, to keep code simpler
+ */
+static void takeover_recv_cb(int sock, short flags, void *arg)
+{
+       PgSocket *bouncer = arg;
+       uint8 data_buf[2048];
+       uint8 cnt_buf[128];
+       struct msghdr msg;
+       struct iovec io;
+       int res;
+       MBuf data;
+
+       memset(&msg, 0, sizeof(msg));
+       io.iov_base = data_buf;
+       io.iov_len = sizeof(data_buf);
+       msg.msg_iov = &io;
+       msg.msg_iovlen = 1;
+       msg.msg_control = cnt_buf;
+       msg.msg_controllen = sizeof(cnt_buf);
+
+       res = safe_recvmsg(sock, &msg, 0);
+       if (res > 0) {
+               mbuf_init(&data, data_buf, res);
+               takeover_parse_data(bouncer, &msg, &data);
+       } else if (res == 0) {
+               takeover_finish(bouncer);
+       } else {
+               if (errno == EAGAIN)
+                       return;
+               fatal_perror("safe_recvmsg");
+       }
+}
+
+/*
+ * login finished, send first command,
+ * replace recv callback with custom recvmsg() based one.
+ */
+void takeover_login(PgSocket *bouncer)
+{
+       bool res;
+
+       slog_info(bouncer, "Login OK, sending SUSPEND");
+       SEND_generic(res, bouncer, 'Q', "s", "SUSPEND;");
+
+       /* use own callback */
+       sbuf_pause(&bouncer->sbuf);
+       sbuf_continue_with_callback(&bouncer->sbuf, takeover_recv_cb);
+}
+
+/* launch connection to running process */
+void takeover_init(void)
+{
+       PgDatabase *db = find_database("pgbouncer");
+       PgPool *pool = get_pool(db, db->forced_user);
+
+       if (!pool)
+               fatal("no admin pool?");
+
+       log_info("takeover_init: launching connection");
+       launch_new_connection(pool);
+}
+
diff --git a/src/takeover.h b/src/takeover.h
new file mode 100644 (file)
index 0000000..08ca095
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+void takeover_init(void);
+void takeover_login(PgSocket *bouncer);
+
diff --git a/src/util.c b/src/util.c
new file mode 100644 (file)
index 0000000..920f0bc
--- /dev/null
@@ -0,0 +1,575 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Random small utility functions
+ */
+
+#include "bouncer.h"
+
+#include "md5.h"
+
+#ifdef HAVE_SYS_UCRED_H
+#include <sys/ucred.h>
+#endif
+
+void *zmalloc(size_t len)
+{
+       void *p = malloc(len);
+       if (p)
+               memset(p, 0, len);
+       return p;
+}
+
+/*
+ * Safe string copy
+ */
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t n)
+{
+       size_t len = strlen(src);
+       if (len < n) {
+               memcpy(dst, src, len + 1);
+       } else if (n > 0) {
+               memcpy(dst, src, n - 1);
+               dst[n - 1] = 0;
+       }
+       return len;
+}
+#endif
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t n)
+{
+       size_t pos = 0;
+       while (pos < n && dst[pos])
+               pos++;
+       if (pos < n)
+               return pos + strlcpy(dst + pos, src, n - pos);
+       return pos + strlen(src);
+}
+#endif
+
+/*
+ * Generic logging
+ */
+
+static void render_time(char *buf, int max)
+{
+       struct tm tm;
+       struct timeval tv;
+       gettimeofday(&tv, NULL);
+       localtime_r(&tv.tv_sec, &tm);
+       strftime(buf, max, "%Y-%m-%d %H:%M:%S", &tm);
+}
+
+static void _log_write(const char *pfx, const char *msg)
+{
+       char buf[1024];
+       char tbuf[64];
+       int len;
+       render_time(tbuf, sizeof(tbuf));
+       len = snprintf(buf, sizeof(buf), "%s %u %s %s\n",
+                       tbuf, (unsigned)getpid(), pfx, msg);
+       if (cf_logfile) {
+               int fd = open(cf_logfile, O_CREAT | O_APPEND | O_WRONLY, 0644);
+               if (fd > 0) {
+                       safe_write(fd, buf, len);
+                       safe_close(fd);
+               }
+       }
+       if (!cf_daemon)
+               fprintf(stderr, "%s", buf);
+}
+
+static void _log(const char *pfx, const char *fmt, va_list ap)
+{
+       char buf[1024];
+       vsnprintf(buf, sizeof(buf), fmt, ap);
+       _log_write(pfx, buf);
+}
+
+void _fatal(const char *file, int line, const char *func,
+           const char *fmt, ...)
+{
+       va_list ap;
+       char buf[1024];
+
+       snprintf(buf, sizeof(buf),
+                "@%s:%d in function %s(): %s",
+                file, line, func, fmt);
+
+       va_start(ap, fmt);
+       _log("FATAL", buf, ap);
+       va_end(ap);
+       if (cf_verbose > 2)
+               abort();
+       exit(1);
+}
+
+void _fatal_perror(const char *file, int line, const char *func,
+                  const char *fmt, ...)
+{
+       va_list ap;
+       char buf[1024];
+       va_start(ap, fmt);
+       vsnprintf(buf, sizeof(buf), fmt, ap);
+       va_end(ap);
+       _fatal(file, line, func, "%s: %s", buf, strerror(errno));
+}
+
+/*
+ * generic logging
+ */
+void log_level(const char *pfx, const char *fmt, ...)
+{
+       va_list ap;
+       va_start(ap, fmt);
+       _log(pfx, fmt, ap);
+       va_end(ap);
+}
+
+/*
+ * Logging about specific PgSocket
+ */
+
+void
+slog_level(const char *pfx, const PgSocket *sock, const char *fmt, ...)
+{
+       char buf1[1024];
+       char buf2[1024];
+       char *user, *db, *host;
+       int port;
+       va_list ap;
+
+       db = sock->pool ? sock->pool->db->name : "(nodb)";
+       user = sock->auth_user ? sock->auth_user->name : "(nouser)";
+       if (sock->addr.is_unix) {
+               host = "unix";
+       } else {
+               host = inet_ntoa(sock->addr.ip_addr);
+       }
+       port = sock->addr.port;
+
+       va_start(ap, fmt);
+       vsnprintf(buf1, sizeof(buf1), fmt, ap);
+       va_end(ap);
+
+       snprintf(buf2, sizeof(buf2), "%c: %s/%s@%s:%d %s",
+                       is_server_socket(sock) ? 'S' : 'C',
+                       db, user, host, port, buf1);
+
+       _log_write(pfx, buf2);
+}
+
+
+/*
+ * Wrappers for read/write/recv/send that survive interruptions.
+ */
+
+int safe_read(int fd, void *buf, int len)
+{
+       int res;
+loop:
+       res = read(fd, buf, len);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+       return res;
+}
+
+int safe_write(int fd, const void *buf, int len)
+{
+       int res;
+loop:
+       res = write(fd, buf, len);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+       return res;
+}
+
+int safe_recv(int fd, void *buf, int len, int flags)
+{
+       int res;
+loop:
+       res = recv(fd, buf, len, flags);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+       if (res < 0)
+               log_noise("safe_recv(%d, %d) = %s", fd, len, strerror(errno));
+       else if (cf_verbose > 2)
+               log_noise("safe_recv(%d, %d) = %d", fd, len, res);
+       return res;
+}
+
+int safe_send(int fd, const void *buf, int len, int flags)
+{
+       int res;
+loop:
+       res = send(fd, buf, len, flags);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+       if (res < 0)
+               log_noise("safe_send(%d, %d) = %s", fd, len, strerror(errno));
+       else if (cf_verbose > 2)
+               log_noise("safe_send(%d, %d) = %d", fd, len, res);
+       return res;
+}
+
+int safe_close(int fd)
+{
+       int res;
+loop:
+       /* by manpage, the close() could be interruptable
+          although it seems that at least in linux it cannot happen */
+       res = close(fd);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+       return res;
+}
+
+int safe_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+       int res;
+loop:
+       res = recvmsg(fd, msg, flags);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+       if (res < 0)
+               log_warning("safe_recvmsg(%d, msg, %d) = %s", fd, flags, strerror(errno));
+       else if (cf_verbose > 2)
+               log_noise("safe_recvmsg(%d, msg, %d) = %d", fd, flags, res);
+       return res;
+}
+
+int safe_sendmsg(int fd, const struct msghdr *msg, int flags)
+{
+       int res;
+       int msgerr_count = 0;
+loop:
+       res = sendmsg(fd, msg, flags);
+       if (res < 0 && errno == EINTR)
+               goto loop;
+
+       if (res < 0) {
+               log_warning("safe_sendmsg(%d, msg[%d,%d], %d) = %s", fd,
+                           msg->msg_iov[0].iov_len,
+                           msg->msg_controllen,
+                           flags, strerror(errno));
+
+               /* with ancillary data pn blocking socket OSX returns
+                * EMSGSIZE instead of blocking.  try to solve it by waiting */
+               if (errno == EMSGSIZE && msgerr_count < 20) {
+                       struct timeval tv = {1, 0};
+                       log_warning("trying to sleep a bit");
+                       select(0, NULL, NULL, NULL, &tv);
+                       msgerr_count++;
+                       goto loop;
+               }
+       } else if (cf_verbose > 2)
+               log_noise("safe_sendmsg(%d, msg, %d) = %d", fd, flags, res);
+       return res;
+}
+
+/*
+ * Load a file into malloc()-ed C string.
+ */
+
+char *load_file(const char *fn)
+{
+       struct stat st;
+       char *buf = NULL;
+       int res, fd;
+
+       res = stat(fn, &st);
+       if (res < 0) {
+               log_error("%s: %s", fn, strerror(errno));
+               goto load_error;
+       }
+
+       buf = malloc(st.st_size + 1);
+       if (!buf)
+               goto load_error;
+
+       if ((fd = open(fn, O_RDONLY)) < 0) {
+               log_error("%s: %s", fn, strerror(errno));
+               goto load_error;
+       }
+
+       if ((res = safe_read(fd, buf, st.st_size)) < 0) {
+               log_error("%s: %s", fn, strerror(errno));
+               goto load_error;
+       }
+
+       safe_close(fd);
+       buf[st.st_size] = 0;
+
+       return buf;
+
+load_error:
+       if (buf != NULL)
+               free(buf);
+       return NULL;
+}
+
+/*
+ * PostgreSQL MD5 "encryption".
+ */
+
+static void hash2hex(const uint8 *hash, char *dst)
+{
+        int i;
+        static const char hextbl [] = "0123456789abcdef";
+        for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
+                *dst++ = hextbl[hash[i] >> 4];
+                *dst++ = hextbl[hash[i] & 15];
+        }
+        *dst = 0;
+}
+
+bool pg_md5_encrypt(const char *part1,
+                   const char *part2, size_t part2len,
+                   char *dest)
+{
+        MD5_CTX ctx;
+        uint8 hash[MD5_DIGEST_LENGTH];
+
+        MD5_Init(&ctx);
+        MD5_Update(&ctx, part1, strlen(part1));
+        MD5_Update(&ctx, part2, part2len);
+        MD5_Final(hash, &ctx);
+
+       memcpy(dest, "md5", 3);
+        hash2hex(hash, dest + 3);
+
+       memset(hash, 0, sizeof(*hash));
+       return true;
+}
+
+/* wrapper for usable crypt() */
+const char *pg_crypt(const char *passwd, const char *salt)
+{
+       return crypt(passwd, salt);
+}
+
+/* wrapped for getting random bytes */
+bool get_random_bytes(uint8 *dest, int len)
+{
+       int i;
+       for (i = 0; i < len; i++)
+               dest[i] = random() & 255;
+       return len;
+}
+
+/*
+ * high-precision time
+ */
+
+usec_t get_time_usec(void)
+{
+       struct timeval tv;
+       gettimeofday(&tv, NULL);
+       return (usec_t)tv.tv_sec * USEC + tv.tv_usec;
+}
+
+/*
+ * cache time, as we dont need sub-second precision
+ */
+static usec_t time_cache = 0;
+
+usec_t get_cached_time(void)
+{
+       if (!time_cache)
+               time_cache = get_time_usec();
+       return time_cache;
+}
+
+void reset_time_cache(void)
+{
+       time_cache = 0;
+}
+
+/*
+ * get other side's uid.
+ */
+bool get_unix_peer_uid(int fd, uid_t *uid_p)
+{
+       int res = -1;
+#ifdef SO_PEERCRED
+       struct ucred cred;
+       socklen_t len = sizeof(cred);
+       res = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len);
+       if (res >= 0)
+               *uid_p = cred.uid;
+       else
+               log_error("getsockopt(SO_PEERCRED): %s", strerror(errno));
+#else /* !SO_PEERCRED */
+#ifdef LOCAL_PEERCRED
+       struct xucred cred;
+       socklen_t len = sizeof(cred);
+       res = getsockopt(fd, AF_UNIX, LOCAL_PEERCRED, &cred, &len);
+       if (res >= 0)
+               *uid_p = cred.cr_uid;
+       else
+               log_error("getsockopt(LOCAL_PEERCRED): %s", strerror(errno));
+#endif /* !LOCAL_PEERCRED */
+#endif /* !SO_PEERCRED */
+       return (res >= 0);
+}
+
+void socket_set_nonblocking(int fd, int val)
+{
+       int flags, res;
+
+       /* get old flags */
+       flags = fcntl(fd, F_GETFL, 0);
+       if (flags < 0)
+               fatal_perror("fcntl(F_GETFL)");
+
+       /* flip O_NONBLOCK */
+       if (val)
+               flags |= O_NONBLOCK;
+       else
+               flags &= ~O_NONBLOCK;
+
+       /* set new flags */
+       res = fcntl(fd, F_SETFL, flags);
+       if (res < 0)
+               fatal_perror("fcntl(F_SETFL)");
+}
+
+/* set needed socket options */
+void tune_socket(int sock, bool is_unix)
+{
+       int res;
+       int val;
+
+       /* close fd on exec */
+       res = fcntl(sock, F_SETFD, FD_CLOEXEC);
+       if (res < 0)
+               fatal_perror("fcntl FD_CLOEXEC");
+
+       /* when no data avail, return EAGAIN instead blocking */
+       socket_set_nonblocking(sock, 1);
+
+#ifdef SO_NOSIGPIPE
+       /* disallow SIGPIPE, if possible */
+       val = 1;
+       res = setsockopt(sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof(val));
+       if (res < 0)
+               fatal_perror("setsockopt SO_NOSIGPIPE");
+#endif
+
+       /*
+        * Following options are for network sockets
+        */
+       if (is_unix)
+               return;
+
+       /* the keepalive stuff needs some poking before enbling */
+       if (cf_tcp_keepalive) {
+               /* turn on socket keepalive */
+               val = 1;
+               res = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val));
+               if (res < 0)
+                       fatal_perror("setsockopt SO_KEEPALIVE");
+#ifdef __linux__
+               /* set count of keepalive packets */
+               if (cf_tcp_keepcnt > 0) {
+                       val = cf_tcp_keepcnt;
+                       res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val));
+                       if (res < 0)
+                               fatal_perror("setsockopt TCP_KEEPCNT");
+               }
+               /* how lond the connection can stay idle before sending keepalive pkts */
+               if (cf_tcp_keepidle) {
+                       val = cf_tcp_keepidle;
+                       res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val));
+                       if (res < 0)
+                               fatal_perror("setsockopt TCP_KEEPIDLE");
+               }
+               /* time between packets */
+               if (cf_tcp_keepintvl) {
+                       val = cf_tcp_keepintvl;
+                       res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val));
+                       if (res < 0)
+                               fatal_perror("setsockopt TCP_KEEPINTVL");
+               }
+#else
+#ifdef TCP_KEEPALIVE
+               if (cf_tcp_keepidle) {
+                       val = cf_tcp_keepidle;
+                       res = setsockopt(sock, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val));
+                       if (res < 0)
+                               fatal_perror("setsockopt TCP_KEEPALIVE");
+               }
+#endif
+#endif
+       }
+
+       /* set in-kernel socket buffer size */
+       if (cf_tcp_socket_buffer) {
+               val = cf_tcp_socket_buffer;
+               res = setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+               if (res < 0)
+                       fatal_perror("setsockopt SO_SNDBUF");
+               val = cf_tcp_socket_buffer;
+               res = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val));
+               if (res < 0)
+                       fatal_perror("setsockopt SO_RCVBUF");
+       }
+
+       /*
+        * Turn off kernel buffering, each send() will be one packet.
+        */
+       val = 1;
+       res = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
+       if (res < 0)
+               fatal_perror("setsockopt TCP_NODELAY");
+}
+
+
+bool strlist_contains(const char *liststr, const char *str)
+{
+       int c, len = strlen(str);
+       const char *p = strstr(liststr, str);
+
+       if (p == NULL)
+               return false;
+
+       /* check if item start */
+       if (p > liststr) {
+               c = *(p - 1);
+               if (!isspace(c) && c != ',')
+                       return false;
+       }
+
+       /* check if item end */
+       c = p[len];
+       if (c != 0 && !isspace(c) && c != ',')
+               return false;
+
+       return true;
+}
+
+const char *format_date(usec_t uval)
+{
+       static char buf[128];
+       time_t tval = uval / USEC;
+       strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&tval));
+       return buf;
+}
+
diff --git a/src/util.h b/src/util.h
new file mode 100644 (file)
index 0000000..359fa19
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * PgBouncer - Lightweight connection pooler for PostgreSQL.
+ * 
+ * Copyright (c) 2007 Marko Kreen, Skype Technologies OÜ
+ * 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * time tools
+ */
+typedef uint64_t usec_t;
+usec_t get_cached_time(void);
+void reset_time_cache(void);
+usec_t get_time_usec(void);
+
+/*
+ * load file into malloced buffer
+ */
+char *load_file(const char *fn);
+
+void *zmalloc(size_t len);
+
+/*
+ * generic logging
+ */
+void log_level(const char *level, const char *s, ...);
+#define log_error(args...) log_level("ERROR", ## args)
+#define log_warning(args...) log_level("WARNING", ## args)
+#define log_info(args...) log_level("LOG", ## args)
+#define log_debug(args...) do { \
+               if (cf_verbose > 0) log_level("DEBUG", ## args); \
+       } while (0)
+#define log_noise(args...) do { \
+               if (cf_verbose > 1) log_level("NOISE", ## args); \
+       } while (0)
+
+
+/*
+ * logging about specific socket
+ */
+void slog_level(const char *level, const PgSocket *sock, const char *fmt, ...);
+#define slog_error(sk, args...) slog_level("ERROR", sk, ## args)
+#define slog_warning(sk, args...) slog_level("WARNING", sk, ## args)
+#define slog_info(sk, args...) slog_level("LOG", sk, ## args)
+#define slog_debug(sk, args...) do { \
+               if (cf_verbose > 0) slog_level("DEBUG", sk, ## args); \
+       } while (0)
+#define slog_noise(sk, args...) do { \
+               if (cf_verbose > 1) slog_level("NOISE", sk, ## args); \
+       } while (0)
+
+/*
+ * log and exit
+ */
+void _fatal(const char *file, int line, const char *func, const char *s, ...);
+void _fatal_perror(const char *file, int line, const char *func, const char *s, ...);
+#define fatal(args...) \
+       _fatal(__FILE__, __LINE__, __FUNCTION__, ## args)
+#define fatal_perror(args...) \
+       _fatal_perror(__FILE__, __LINE__, __FUNCTION__, ## args)
+
+/*
+ * non-interruptible operations
+ */
+int safe_read(int fd, void *buf, int len);
+int safe_write(int fd, const void *buf, int len);
+int safe_recv(int fd, void *buf, int len, int flags);
+int safe_send(int fd, const void *buf, int len, int flags);
+int safe_close(int fd);
+int safe_recvmsg(int fd, struct msghdr *msg, int flags);
+int safe_sendmsg(int fd, const struct msghdr *msg, int flags);
+
+/*
+ * password tools
+ */
+#define MD5_PASSWD_LEN  35
+#define isMD5(passwd) (memcmp(passwd, "md5", 3) == 0 \
+               && strlen(passwd) == MD5_PASSWD_LEN)
+bool pg_md5_encrypt(const char *part1, const char *part2, size_t p2len, char *dest);
+const char *pg_crypt(const char *passwd, const char *salt);
+bool get_random_bytes(uint8 *dest, int len);
+
+/*
+ * safe string copy
+ */
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t n);
+#endif
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t n);
+#endif
+
+/*
+ * socket option handling
+ */
+bool get_unix_peer_uid(int fd, uid_t *uid_p);
+void socket_set_nonblocking(int fd, int val);
+void tune_socket(int sock, bool is_unix);
+
+bool strlist_contains(const char *liststr, const char *str);
+
+const char *format_date(usec_t uval);
+
diff --git a/test/Makefile b/test/Makefile
new file mode 100644 (file)
index 0000000..f4e6294
--- /dev/null
@@ -0,0 +1,14 @@
+
+PGINC = -I$(shell pg_config --includedir)
+PGLIB = -L$(shell pg_config --libdir)
+
+CFLAGS = -O2 -g -Wall $(PGINC) -I$(HOME)/src/libevent -I../src
+LDFLAGS = $(PGLIB) -lpq -L$(HOME)/src/libevent/.libs -levent
+
+all: asynctest
+
+asynctest: asynctest.c
+
+clean:
+       rm -f asynctest
+
diff --git a/test/asynctest.c b/test/asynctest.c
new file mode 100644 (file)
index 0000000..5d7a074
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ * Things to test:
+ * - Conn per query
+ * - show tx
+ * - long tx
+ * - variable-size query
+ */
+
+#include <sys/time.h>
+#include <sys/select.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libpq-fe.h>
+#include <event.h>
+
+#define Assert(e) do { if (!(e)) { \
+       printf("Assert(%s) failed: %s:%d in %s\n", \
+               #e, __FILE__, __LINE__, __FUNCTION__); \
+       exit(1); } } while (0)
+
+typedef enum { false=0, true=1 } bool;
+
+#include "list.h"
+
+typedef struct DbConn {
+       List            head;
+       const char      *connstr;
+       struct event    ev;
+       //time_t                connect_time;
+       //unsigned      query_count;
+       PGconn          *con;
+       const char      *query;
+} DbConn;
+
+static LIST(idle_list);
+static LIST(active_list);
+
+static DbConn *new_db(const char *connstr)
+{
+       DbConn *db = malloc(sizeof(*db));
+       memset(db, 0, sizeof(*db));
+       list_init(&db->head);
+       db->connstr = connstr;
+       return db;
+}
+
+static void set_idle(DbConn *db)
+{
+       Assert(item_in_list(&db->head, &active_list));
+       list_del(&db->head);
+       list_append(&db->head, &idle_list);
+}
+
+static void set_active(DbConn *db)
+{
+       Assert(item_in_list(&db->head, &idle_list));
+       list_del(&db->head);
+       list_append(&db->head, &active_list);
+}
+
+/** some error happened */
+static void conn_error(DbConn *db, const char *desc)
+{
+       if (db->con) {
+               printf("libpq error in %s: %s\n",
+                      desc, PQerrorMessage(db->con));
+               PQfinish(db->con);
+               db->con = NULL;
+       } else {
+               printf("random error\n");
+       }
+       set_idle(db);
+}
+
+/**
+ * Connection has a resultset avalable, fetch it.
+ *
+ * Returns true if there may be more results coming,
+ * false if all done.
+ */
+static bool another_result(DbConn *db)
+{
+       PGresult *res;
+
+       /* got one */
+       res = PQgetResult(db->con);
+       if (res == NULL) {
+               set_idle(db);
+               if (1) {
+                       PQfinish(db->con);
+                       db->con = NULL;
+               }
+               return false;
+       }
+
+       switch (PQresultStatus(res)) {
+       case PGRES_TUPLES_OK:
+               // todo: check result
+       case PGRES_COMMAND_OK:
+               PQclear(res);
+               break;
+       default:
+               PQclear(res);
+               conn_error(db, "weird result");
+               return false;
+       }
+       return true;
+}
+
+/**
+ * Called when select() told that conn is avail for reading/writing.
+ *
+ * It should call postgres handlers and then change state if needed.
+ */
+static void result_cb(int sock, short flags, void *arg)
+{
+       DbConn *db = arg;
+       int res;
+
+       res = PQconsumeInput(db->con);
+       if (res == 0) {
+               conn_error(db, "PQconsumeInput");
+               return;
+       }
+
+       /* loop until PQgetResult returns NULL */
+       while (1) {
+               /* if PQisBusy, then incomplete result */
+               if (PQisBusy(db->con)) {
+                       event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db);
+                       event_add(&db->ev, NULL);
+                       break;
+               }
+
+               /* got one */
+               if (!another_result(db))
+                       break;
+       }
+}
+
+static void send_cb(int sock, short flags, void *arg)
+{
+       int res;
+       DbConn *db = arg;
+
+       res = PQflush(db->con);
+       if (res > 0) {
+               event_set(&db->ev, PQsocket(db->con), EV_WRITE, send_cb, db);
+               event_add(&db->ev, NULL);
+       } else if (res == 0) {
+               event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db);
+               event_add(&db->ev, NULL);
+       } else
+               conn_error(db, "PQflush");
+}
+
+/** send the query to server connection */
+static void send_query(DbConn *db)
+{
+       int res;
+
+       /* send query */
+       res = PQsendQueryParams(db->con, db->query, 0,
+                       NULL,   /* paramTypes */
+                       NULL,   /* paramValues */
+                       NULL,   /* paramLengths */
+                       NULL,   /* paramFormats */
+                       0);     /* resultformat, 0-text, 1-bin */
+       if (!res) {
+               conn_error(db, "PQsendQueryParams");
+               return;
+       }
+
+       /* flush it down */
+       res = PQflush(db->con);
+       if (res > 0) {
+               event_set(&db->ev, PQsocket(db->con), EV_WRITE, send_cb, db);
+               event_add(&db->ev, NULL);
+       } else if (res == 0) {
+               event_set(&db->ev, PQsocket(db->con), EV_READ, result_cb, db);
+               event_add(&db->ev, NULL);
+       } else
+               conn_error(db, "PQflush");
+}
+
+static void connect_cb(int sock, short flags, void *arg)
+{
+       DbConn *db = arg;
+       PostgresPollingStatusType poll_res;
+
+       poll_res = PQconnectPoll(db->con);
+       switch (poll_res) {
+       case PGRES_POLLING_WRITING:
+               event_set(&db->ev, PQsocket(db->con), EV_WRITE, connect_cb, db);
+               event_add(&db->ev, NULL);
+               break;
+       case PGRES_POLLING_READING:
+               event_set(&db->ev, PQsocket(db->con), EV_READ, connect_cb, db);
+               event_add(&db->ev, NULL);
+               break;
+       case PGRES_POLLING_OK:
+               send_query(db);
+               break;
+       case PGRES_POLLING_ACTIVE:
+       case PGRES_POLLING_FAILED:
+               conn_error(db, "PQconnectPoll");
+       }
+}
+
+static void launch_connect(DbConn *db)
+{
+       /* launch new connection */
+       db->con = PQconnectStart(db->connstr);
+       if (db->con == NULL) {
+               conn_error(db, "PQconnectStart: no mem");
+               return;
+       }
+
+       if (PQstatus(db->con) == CONNECTION_BAD) {
+               conn_error(db, "PQconnectStart");
+               return;
+       }
+
+       event_set(&db->ev, PQsocket(db->con), EV_WRITE, connect_cb, db);
+       event_add(&db->ev, NULL);
+}
+
+static void handle_idle(DbConn *db)
+{
+       set_active(db);
+       if (db->con)
+               send_query(db);
+       else
+               launch_connect(db);
+}
+
+int main(void)
+{
+       int i;
+       DbConn *db;
+       List *item, *tmp;
+
+       for (i = 0; i < 10; i++) {
+               db = new_db("dbname=marko port=6000 host=/tmp");
+               db->query = "select 1";
+               list_append(&db->head, &idle_list);
+       }
+
+       event_init();
+
+       while (1) {
+               event_loop(EVLOOP_ONCE);
+               list_for_each_safe(item, &idle_list, tmp) {
+                       db = container_of(item, DbConn, head);
+                       handle_idle(db);
+               }
+       }
+       return 0;
+}
+
+
diff --git a/test/stress.py b/test/stress.py
new file mode 100755 (executable)
index 0000000..836cc79
--- /dev/null
@@ -0,0 +1,114 @@
+#! /usr/bin/env python
+
+import sys, os, re, time, psycopg
+import threading, thread, random
+
+n_thread = 100
+longtx = 0
+tx_sleep = 0
+tx_sleep = 8
+
+conn_data = {
+    'dbname': 'marko',
+    #'host': '127.0.0.1',
+    'host': '/tmp',
+    'port': '6000',
+    'user': 'marko',
+    #'password': '',
+    'connect_timeout': '5',
+}
+
+def get_connstr():
+    tmp = []
+    for k, v in conn_data.items():
+        tmp.append(k+'='+v)
+    return " ".join(tmp)
+
+class WorkThread(threading.Thread):
+    def __init__(self):
+        threading.Thread.__init__(self)
+        self.setDaemon(True)
+        self.stat_lock = threading.Lock()
+        self.query_cnt = 0
+
+    def inc_cnt(self):
+        self.stat_lock.acquire()
+        self.query_cnt += 1
+        self.stat_lock.release()
+
+    def fetch_cnt(self):
+        self.stat_lock.acquire()
+        val = self.query_cnt
+        self.query_cnt = 0
+        self.stat_lock.release()
+        return val
+
+    def run(self):
+        try:
+            time.sleep(random.random() * 10.0)
+        except: pass
+        while 1:
+            try:
+                self.main_loop()
+            except KeyboardInterrupt:
+                break
+            except SystemExit:
+                break
+            except Exception, d:
+                print d
+                try:
+                    time.sleep(5)
+                except: pass
+
+    def main_loop(self):
+        db = psycopg.connect(get_connstr())
+        if not longtx:
+            db.autocommit(1)
+        n = 0
+        while n < 10:
+            self.do_work(db)
+            self.inc_cnt()
+            n += 1
+
+    def do_work(self, db):
+        curs = db.cursor()
+        q = "select pg_sleep(%.02f)" % (random.random() * 1)
+        curs.execute(q)
+        time.sleep(tx_sleep * random.random() + 1)
+        if longtx:
+            db.commit()
+
+def main():
+    print "connstr", get_connstr()
+
+    thread_list = []
+    while len(thread_list) < n_thread:
+        t = WorkThread()
+        t.start()
+        thread_list.append(t)
+
+    print "started %d threads" % len(thread_list)
+
+    last = time.time()
+    while 1:
+        time.sleep(1)
+        now = time.time()
+        dur = now - last
+        if dur >= 5:
+            last = now
+            cnt = 0
+            for t in thread_list:
+                cnt += t.fetch_cnt()
+            avg = cnt / dur
+            print "avg", avg
+
+if __name__ == '__main__':
+    try:
+        main()
+    except SystemExit:
+        pass
+    except KeyboardInterrupt:
+        pass
+    #except Exception, d:
+    #    print d
+
diff --git a/test/test.ini b/test/test.ini
new file mode 100644 (file)
index 0000000..fa85fa7
--- /dev/null
@@ -0,0 +1,136 @@
+;; database name = connect string
+[databases]
+
+p0 = port=6666 host=127.0.0.1 dbname=p0 user=bouncer pool_size=2
+p1 = port=6666 host=127.0.0.1 dbname=p1 user=bouncer
+p2 = port=6668 host=127.0.0.1 dbname=p2 user=bouncer
+
+;; Configuation section
+[pgbouncer]
+
+;;;
+;;; Administrative settings
+;;;
+
+logfile = test.log
+pidfile = test.pid
+
+;;;
+;;; Where to wait for clients
+;;;
+
+; ip address or * which means all ip-s
+listen_addr = 127.0.0.1
+listen_port = 6667
+unix_socket_dir = /tmp
+
+;;;
+;;; Authentication settings
+;;;
+
+; any, trust, plain, crypt, md5
+auth_type = trust
+#auth_file = 8.0/main/global/pg_auth
+auth_file = userlist.txt
+
+;;;
+;;; Pooler personality questions
+;;;
+
+; When server connection is released back to pool:
+;   session      - after client disconnects
+;   transaction  - after transaction finishes
+;   statement    - after statement finishes
+pool_mode = statement
+
+; When taking idle server into use, this query is ran first.
+;
+; Query for session pooling:
+;   ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT
+; Query for statement/transaction pooling:
+;   SELECT 1
+; Empty query disables the functionality
+server_check_query = select 1
+
+; If server was used more recently that this many seconds ago,
+; skip the check query.  If 0, the check query is always ran.
+server_check_delay = 10
+
+;;;
+;;; Connection limits
+;;;
+
+; total number of clients that can connect
+max_client_conn = 10
+default_pool_size = 5
+
+;;;
+;;; Timeouts
+;;;
+
+; Close server connection if its been connected longer.
+server_lifetime = 120
+
+; Close server connection if its not been used in this time.
+; Allows to clean unneccessary connections from pool after peak.
+server_idle_timeout = 60
+
+; Cancel connection attepmt if server does not answer takes longer.
+server_connect_timeout = 15
+
+; If server login failed (server_connect_timeout or auth failure)
+; then wait this many second.
+server_login_retry = 15
+
+; Dangerous.  Server connection is closed if query does not return
+; in this time.  Should be used to survive network problems,
+; _not_ as statement_timeout. (default: 0)
+query_timeout = 20
+
+; Dangerous.  Client connection is closed if no activity in this time.
+; Should be used to survive network problems. (default: 0)
+client_idle_timeout = 0
+
+
+;;;
+;;; Low-level tuning options
+;;;
+
+; buffer for streaming packets
+pkt_buf = 2048
+
+;;;
+;;; networking options, for info: man 7 tcp
+;;;
+
+; linux: notify program about new connection only if there
+; is also data received.  (Seconds to wait.)
+tcp_defer_accept = 0
+
+;; following options are reloadable, but apply only to
+;; new connections.
+
+; in-kernel buffer size (linux default: 4096)
+tcp_socket_buffer = 0
+
+; whether tcp keepalive should be turned on (0/1)
+tcp_keepalive = 0
+
+;; following options are linux-specific.
+;; they also require tcp_keepalive=1
+
+; count of keepaliva packets
+tcp_keepcnt = 0
+
+; how long the connection can be idle,
+; before sending keepalive packets
+tcp_keepidle = 0
+
+; The time between individual keepalive probes.
+tcp_keepintvl = 0
+
+; By default, max tcp packet cannot be larger than pkt_buf.  
+; If this is set, then bouncer tells to kernel to queue packets.
+; Then max pkt length is tcp_socket_buffer.
+tcp_buffer_more = 0
+
diff --git a/test/test.sh b/test/test.sh
new file mode 100755 (executable)
index 0000000..ed2a4c1
--- /dev/null
@@ -0,0 +1,404 @@
+#!/bin/sh
+
+# Notes:
+# - uses iptables and -F with some tests, probably not very friendly to your firewall
+# - uses nc (netcat) with some tests, skips if not in path
+# - assumes postgres 8.2 fix your path so that it comes first
+
+export PATH=/usr/lib/postgresql/8.2/bin:$PATH
+export PGDATA=$PWD/pgdata
+export PGHOST=localhost
+export PGPORT=6667
+export EF_ALLOW_MALLOC_0=1
+
+BOUNCER_LOG=test.log
+BOUNCER_INI=test.ini
+BOUNCER_PID=test.pid
+BOUNCER_PORT=`sed -n '/^listen_port/s/listen_port.*=[^0-9]*//p' $BOUNCER_INI`
+BOUNCER_EXE=./pgbouncer
+
+LOGDIR=log
+NC_PORT=6668
+PG_PORT=6666
+PG_LOG=$LOGDIR/pg.log
+
+pgctl() {
+       pg_ctl -o "-p $PG_PORT" -D $PGDATA $@ >>$PG_LOG 2>&1
+}
+
+mkdir -p $LOGDIR
+rm -f $BOUNCER_LOG $PG_LOG
+# rm -r $PGDATA
+
+if [ ! -d $PGDATA ]; then
+       mkdir $PGDATA
+       initdb >/dev/null 2>&1
+fi
+
+pgctl start
+sleep 5
+
+psql -p $PG_PORT -l |grep p0 > /dev/null || {
+       psql -p $PG_PORT -c "create user bouncer" template1
+       createdb -p $PG_PORT p0
+       createdb -p $PG_PORT p1
+}
+
+$BOUNCER_EXE -d $BOUNCER_INI
+sleep 1
+
+#
+#  fw hacks
+#
+
+fw_drop_port() {
+       case `uname` in
+       Linux)
+               sudo iptables -A OUTPUT -p tcp --dport $1 -j DROP;;
+       Darwin)
+               sudo ipfw add 100 drop tcp from any to 127.0.0.1 dst-port $1;;
+       *)
+               echo "Unknown OS";;
+       esac
+}
+fw_reject_port() {
+       case `uname` in
+       Linux)
+               sudo iptables -A OUTPUT -p tcp --dport $1 -j REJECT --reject-with tcp-reset;;
+       Darwin)
+               sudo ipfw add 100 reset tcp from any to 127.0.0.1 dst-port $1;;
+       *)
+               echo "Unknown OS";;
+       esac
+}
+
+fw_reset() {
+       case `uname` in
+       Linux)
+               sudo iptables -F;;
+       Darwin)
+               sudo ipfw del 100;;
+       *)
+               echo "Unknown OS"; exit 1;;
+       esac
+}
+
+#
+# util functions
+#
+
+complete() {
+       test -f $BOUNCER_PID && kill `cat $BOUNCER_PID` >/dev/null 2>&1
+       pgctl -m fast stop
+       rm -f $BOUNCER_PID
+}
+
+die() {
+       echo $@
+       complete
+       exit 1
+}
+
+admin() {
+       psql -h /tmp -U pgbouncer pgbouncer -c "$@;" || die "Cannot contact bouncer!"
+}
+
+runtest() {
+       echo -n "`date` running $1 ... "
+       eval $1 >$LOGDIR/$1.log 2>&1
+       if [ $? -eq 0 ]; then
+               echo "SUCCESS"
+       else
+               echo "FAILED"
+       fi
+       date >> $LOGDIR/$1.log
+
+       # allow background processing to complete
+       wait
+       # start with fresh config
+       kill -HUP `cat $BOUNCER_PID`
+}
+
+# server_lifetime
+test_server_lifetime() {
+       admin "set server_lifetime=2"
+       psql -c "select now()" p0
+       sleep 3
+
+       rc=`psql -p $PG_PORT -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='p0'" p0`
+       psql -c "select now()" p0
+       return $rc
+}
+
+# server_idle_timeout
+test_server_idle_timeout() {
+       admin "set server_idle_timeout=2"
+       psql -c "select now()" p0
+       sleep 3
+       rc=`psql -p $PG_PORT -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='p0'" p0`
+       psql -c "select now()" p0
+       return $rc
+}
+
+# query_timeout
+test_query_timeout() {
+       admin "set query_timeout=3"
+       psql -c "select pg_sleep(5)" p0 && return 1
+       return 0
+}
+
+# client_idle_timeout
+test_client_idle_timeout() {
+       admin "set client_idle_timeout=2"
+       psql --set ON_ERROR_STOP=1 p0 <<-PSQL_EOF
+       select now();
+       \! sleep 3
+       select now();
+       PSQL_EOF
+       test $? -eq 0 && return 1
+       return 0
+}
+
+# server_login_retry 
+test_server_login_retry() {
+       admin "set query_timeout=10"
+       admin "set server_login_retry=1"
+
+       (pgctl -m fast stop; sleep 3; pgctl start) &
+       sleep 1
+       psql -c "select now()" p0
+       rc=$?
+       wait
+       return $rc
+}
+
+# server_connect_timeout - uses netcat to start dummy server
+test_server_connect_timeout_establish() {
+       which nc >/dev/null || return 1
+
+       nc -l -p $NC_PORT >/dev/null &
+       admin "set query_timeout=3"
+       admin "set server_connect_timeout=2"
+       psql -c "select now()" p2
+       # client will always see query_timeout, need to grep for connect timeout
+       grep "closing because: connect timeout" $BOUNCER_LOG 
+        # didnt seem to die otherwise
+       killall nc
+       return $?
+}
+
+# server_connect_timeout - block with iptables
+# XXX: for some reason bouncer says 'connect failed' not 'connect timeout'
+test_server_connect_timeout_reject() {
+       test -z $CAN_SUDO && return 1
+       admin "set query_timeout=5"
+       admin "set server_connect_timeout=3"
+       fw_drop_port $PG_PORT
+       psql -c "select now()" p0
+       fw_reset
+       # client will always see query_timeout, need to grep for connect timeout
+       grep "closing because: connect failed" $BOUNCER_LOG
+}
+
+# server_check_delay
+test_server_check_delay() {
+       test -z $CAN_SUDO && return 1
+
+       admin "set server_check_delay=2"
+       admin "set server_login_retry=3"
+       admin "set query_timeout=10"
+
+       psql p0 -c "select now()"
+       fw_reject_port $PG_PORT
+       sleep 3
+       psql -tAq p0 -c "select 1" >$LOGDIR/test.tmp &
+       sleep 1
+       fw_reset
+       echo `date` rules flushed
+       wait
+       echo `date` done waiting
+
+       test "`cat $LOGDIR/test.tmp`" = "1"
+}
+
+# max_client_conn
+test_max_client_conn() {
+       admin "set max_client_conn=5"
+       admin "show config"
+
+       for i in `seq 1 4`; do
+               psql p1 -c "select now() as sleeping from pg_sleep(3);" &
+       done
+
+       # last conn allowed
+       psql p1 -c "select now() as last_conn" || return 1
+
+       # exhaust it
+       psql p1 -c "select now() as sleeping from pg_sleep(3);"  &
+       sleep 1
+
+       # shouldn't be allowed
+       psql p1 -c "select now() as exhausted"  && return 1
+
+       # should be ok
+       echo 'waiting for clients to complete ...'
+       wait
+       psql p1 -c "select now() as ok"  || return 1
+
+       return 0
+}
+
+# - max pool size
+test_pool_size() {
+       
+       docount() {
+               for i in `seq 10`; do
+                       psql $1 -c "select pg_sleep(0.5)"  &
+               done
+               wait
+               cnt=`psql -tAqc "select count(1) from pg_stat_activity where usename='bouncer' and datname='$1'" $1`
+               echo $cnt
+       }
+
+       test `docount p0` -ne 2 && return 1
+       test `docount p1` -ne 5 && return 1
+
+       return 0
+}
+
+# test online restart while clients running
+test_online_restart() {
+       for i in `seq 1 5`; do 
+               for j in `seq 1 10`; do 
+                       psql -c "select now() as sleeping from pg_sleep(0.2)" p0  &
+               done
+
+               pid1=`cat $BOUNCER_PID`
+               echo "old bouncer is $pid1"
+               $BOUNCER_EXE -d -R  $BOUNCER_INI
+               sleep 2
+               pid2=`cat $BOUNCER_PID`
+               echo "new bouncer is $pid2"
+               [ $pid1 = $pid2 ] && return 1
+       done
+       return 0
+}
+
+# test pause/resume
+test_pause_resume() {
+       rm -f $LOGDIR/test.tmp
+       for i in `seq 1 50`; do
+               psql -tAq p0 -c 'select 1 from pg_sleep(0.1)' >>$LOGDIR/test.tmp
+       done &
+
+       for i in `seq 1 5`; do
+               admin "pause"
+               sleep 1
+               admin "resume"
+               sleep 1
+       done
+
+       wait
+       test `wc -l <$LOGDIR/test.tmp` -eq 50
+}
+
+# test suspend/resume
+test_suspend_resume() {
+       rm -f $LOGDIR/test.tmp
+       for i in `seq 1 50`; do
+               psql -tAq p0 -c 'select 1 from pg_sleep(0.1)' >>$LOGDIR/test.tmp
+       done &
+
+       for i in `seq 1 5`; do
+               psql -h /tmp -p $BOUNCER_PORT pgbouncer -U pgbouncer <<-PSQL_EOF
+               suspend;
+               \! sleep 1
+               resume;
+               \! sleep 1
+               PSQL_EOF
+       done
+
+       wait
+       test `wc -l <$LOGDIR/test.tmp` -eq 50
+}
+
+# test pool database restart
+test_database_restart() {
+       admin "set server_login_retry=1"
+
+       psql p0 -c "select now() as p0_before_restart"
+       pgctl -m fast restart
+       echo `date` restart 1
+       psql p0 -c "select now() as p0_after_restart" || return 1
+
+
+       # do with some more clients
+       for i in `seq 1 5`; do
+               psql p0 -c "select pg_sleep($i)" &
+               psql p1 -c "select pg_sleep($i)" &
+       done
+
+       pgctl -m fast restart
+       echo `date` restart 2
+
+       wait
+       psql p0 -c "select now() as p0_after_restart" || return 1
+}
+
+# test connect string change
+test_database_change() {
+       admin "set server_lifetime=2"
+
+       db1=`psql -tAq p1 -c "select current_database()"`
+
+       cp test.ini test.ini.bak
+       sed 's/\(p1 = port=6666 host=127.0.0.1 dbname=\)\(p1\)/\1p0/g' test.ini >test2.ini
+       mv test2.ini test.ini
+
+       kill -HUP `cat $BOUNCER_PID`
+
+       sleep 3
+       db2=`psql -tAq p1 -c "select current_database()"`
+
+       echo "db1=$db1 db2=$db2"
+       cp test.ini.bak test.ini
+       rm test.ini.bak
+
+       admin "show databases"
+       admin "show pools"
+
+       test $db1 = "p1" -a $db2 = "p0"
+}
+
+echo "Testing for sudo access."
+sudo true && CAN_SUDO=1
+
+testlist="
+test_server_login_retry
+test_client_idle_timeout
+test_server_lifetime
+test_server_idle_timeout
+test_query_timeout
+test_server_connect_timeout_establish
+test_server_connect_timeout_reject
+test_server_check_delay
+test_max_client_conn
+test_pool_size
+test_online_restart
+test_pause_resume
+test_suspend_resume
+test_database_restart
+test_database_change
+"
+
+if [ $# -gt 0 ]; then
+       testlist=$@
+fi
+
+for test in $testlist
+do
+       runtest $test
+done
+
+complete
+
diff --git a/test/userlist.txt b/test/userlist.txt
new file mode 100644 (file)
index 0000000..166f46d
--- /dev/null
@@ -0,0 +1,3 @@
+"marko" "asdasd"
+"postgres" "asdasd"
+"pgbouncer" "fake"