From fd0366e1b57df0bb0ad30140acf2c4225cae017b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 18 Nov 1998 00:47:28 +0000 Subject: [PATCH] Performance improvement for libpq: avoid calling malloc separately for each field of each tuple. Makes more difference than you'd think... --- src/interfaces/libpq/fe-exec.c | 335 ++++++++++++++++++++++--------- src/interfaces/libpq/libpq-int.h | 64 ++++-- 2 files changed, 283 insertions(+), 116 deletions(-) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 82c697ef05..e6742a4769 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.69 1998/10/01 01:40:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.70 1998/11/18 00:47:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,10 +28,6 @@ #include -/* the rows array in a PGresGroup has to grow to accommodate the rows */ -/* returned. Each time, we grow by this much: */ -#define TUPARR_GROW_BY 100 - /* keep this in same order as ExecStatusType in libpq-fe.h */ const char *const pgresStatus[] = { "PGRES_EMPTY_QUERY", @@ -49,7 +45,6 @@ const char *const pgresStatus[] = { ((*(conn)->noticeHook) ((conn)->noticeArg, (message))) -static void freeTuple(PGresAttValue *tuple, int numAttributes); static int addTuple(PGresult *res, PGresAttValue *tup); static void parseInput(PGconn *conn); static int getRowDescriptions(PGconn *conn); @@ -58,6 +53,63 @@ static int getNotify(PGconn *conn); static int getNotice(PGconn *conn); +/* ---------------- + * Space management for PGresult. + * + * Formerly, libpq did a separate malloc() for each field of each tuple + * returned by a query. This was remarkably expensive --- malloc/free + * consumed a sizable part of the application's runtime. And there is + * no real need to keep track of the fields separately, since they will + * all be freed together when the PGresult is released. So now, we grab + * large blocks of storage from malloc and allocate space for query data + * within these blocks, using a trivially simple allocator. This reduces + * the number of malloc/free calls dramatically, and it also avoids + * fragmentation of the malloc storage arena. + * The PGresult structure itself is still malloc'd separately. We could + * combine it with the first allocation block, but that would waste space + * for the common case that no extra storage is actually needed (that is, + * the SQL command did not return tuples). + * We also malloc the top-level array of tuple pointers separately, because + * we need to be able to enlarge it via realloc, and our trivial space + * allocator doesn't handle that effectively. (Too bad the FE/BE protocol + * doesn't tell us up front how many tuples will be returned.) + * All other subsidiary storage for a PGresult is kept in PGresult_data blocks + * of size PGRESULT_DATA_BLOCKSIZE. The overhead at the start of each block + * is just a link to the next one, if any. Free-space management info is + * kept in the owning PGresult. + * A query returning a small amount of data will thus require three malloc + * calls: one for the PGresult, one for the tuples pointer array, and one + * PGresult_data block. + * Only the most recently allocated PGresult_data block is a candidate to + * have more stuff added to it --- any extra space left over in older blocks + * is wasted. We could be smarter and search the whole chain, but the point + * here is to be simple and fast. Typical applications do not keep a PGresult + * around very long anyway, so some wasted space within one is not a problem. + * + * Tuning constants for the space allocator are: + * PGRESULT_DATA_BLOCKSIZE: size of a standard allocation block, in bytes + * PGRESULT_ALIGN_BOUNDARY: assumed alignment requirement for binary data + * PGRESULT_SEP_ALLOC_THRESHOLD: objects bigger than this are given separate + * blocks, instead of being crammed into a regular allocation block. + * Requirements for correct function are: + * PGRESULT_ALIGN_BOUNDARY >= sizeof(pointer) + * to ensure the initial pointer in a block is not overwritten. + * PGRESULT_ALIGN_BOUNDARY must be a multiple of the alignment requirements + * of all machine data types. + * PGRESULT_SEP_ALLOC_THRESHOLD + PGRESULT_ALIGN_BOUNDARY <= + * PGRESULT_DATA_BLOCKSIZE + * pqResultAlloc assumes an object smaller than the threshold will fit + * in a new block. + * The amount of space wasted at the end of a block could be as much as + * PGRESULT_SEP_ALLOC_THRESHOLD, so it doesn't pay to make that too large. + * ---------------- + */ + +#define PGRESULT_DATA_BLOCKSIZE 2048 +#define PGRESULT_ALIGN_BOUNDARY 16 /* 8 is probably enough, really */ +#define PGRESULT_SEP_ALLOC_THRESHOLD (PGRESULT_DATA_BLOCKSIZE / 2) + + /* * PQmakeEmptyPGresult * returns a newly allocated, initialized PGresult with given status. @@ -76,7 +128,7 @@ PQmakeEmptyPGresult(PGconn *conn, ExecStatusType status) result = (PGresult *) malloc(sizeof(PGresult)); - result->conn = conn; /* should go away eventually */ + result->conn = conn; /* might be NULL */ result->ntups = 0; result->numAttributes = 0; result->attDescs = NULL; @@ -86,6 +138,11 @@ PQmakeEmptyPGresult(PGconn *conn, ExecStatusType status) result->cmdStatus[0] = '\0'; result->binary = 0; result->errMsg = NULL; + result->null_field[0] = '\0'; + result->curBlock = NULL; + result->curOffset = 0; + result->spaceLeft = 0; + if (conn) /* consider copying conn's errorMessage */ { switch (status) @@ -105,6 +162,117 @@ PQmakeEmptyPGresult(PGconn *conn, ExecStatusType status) return result; } +/* + * pqResultAlloc - + * Allocate subsidiary storage for a PGresult. + * + * nBytes is the amount of space needed for the object. + * If isBinary is true, we assume that we need to align the object on + * a machine allocation boundary. + * If isBinary is false, we assume the object is a char string and can + * be allocated on any byte boundary. + */ +void * +pqResultAlloc(PGresult *res, int nBytes, int isBinary) +{ + char *space; + PGresult_data *block; + + if (! res) + return NULL; + + if (nBytes <= 0) + return res->null_field; + + /* If alignment is needed, round up the current position to an + * alignment boundary. + */ + if (isBinary) + { + int offset = res->curOffset % PGRESULT_ALIGN_BOUNDARY; + if (offset) + { + res->curOffset += PGRESULT_ALIGN_BOUNDARY - offset; + res->spaceLeft -= PGRESULT_ALIGN_BOUNDARY - offset; + } + } + + /* If there's enough space in the current block, no problem. */ + if (nBytes <= res->spaceLeft) + { + space = res->curBlock->space + res->curOffset; + res->curOffset += nBytes; + res->spaceLeft -= nBytes; + return space; + } + + /* If the requested object is very large, give it its own block; this + * avoids wasting what might be most of the current block to start a new + * block. (We'd have to special-case requests bigger than the block size + * anyway.) The object is always given binary alignment in this case. + */ + if (nBytes >= PGRESULT_SEP_ALLOC_THRESHOLD) + { + block = (PGresult_data *) malloc(nBytes + PGRESULT_ALIGN_BOUNDARY); + if (! block) + return NULL; + space = block->space + PGRESULT_ALIGN_BOUNDARY; + if (res->curBlock) + { + /* Tuck special block below the active block, so that we don't + * have to waste the free space in the active block. + */ + block->next = res->curBlock->next; + res->curBlock->next = block; + } + else + { + /* Must set up the new block as the first active block. */ + block->next = NULL; + res->curBlock = block; + res->spaceLeft = 0; /* be sure it's marked full */ + } + return space; + } + + /* Otherwise, start a new block. */ + block = (PGresult_data *) malloc(PGRESULT_DATA_BLOCKSIZE); + if (! block) + return NULL; + block->next = res->curBlock; + res->curBlock = block; + if (isBinary) + { + /* object needs full alignment */ + res->curOffset = PGRESULT_ALIGN_BOUNDARY; + res->spaceLeft = PGRESULT_DATA_BLOCKSIZE - PGRESULT_ALIGN_BOUNDARY; + } + else + { + /* we can cram it right after the overhead pointer */ + res->curOffset = sizeof(PGresult_data); + res->spaceLeft = PGRESULT_DATA_BLOCKSIZE - sizeof(PGresult_data); + } + + space = block->space + res->curOffset; + res->curOffset += nBytes; + res->spaceLeft -= nBytes; + return space; +} + +/* + * pqResultStrdup - + * Like strdup, but the space is subsidiary PGresult space. + */ +char * +pqResultStrdup(PGresult *res, const char *str) +{ + char *space = (char*) pqResultAlloc(res, strlen(str)+1, FALSE); + if (space) + strcpy(space, str); + return space; +} + /* * pqSetResultError - * assign a new error message to a PGresult @@ -114,11 +282,10 @@ pqSetResultError(PGresult *res, const char *msg) { if (!res) return; - if (res->errMsg) - free(res->errMsg); - res->errMsg = NULL; if (msg && *msg) - res->errMsg = strdup(msg); + res->errMsg = pqResultStrdup(res, msg); + else + res->errMsg = NULL; } /* @@ -128,58 +295,25 @@ pqSetResultError(PGresult *res, const char *msg) void PQclear(PGresult *res) { - int i; + PGresult_data *block; if (!res) return; - /* free all the rows */ - if (res->tuples) - { - for (i = 0; i < res->ntups; i++) - freeTuple(res->tuples[i], res->numAttributes); - free(res->tuples); - } - - /* free all the attributes */ - if (res->attDescs) - { - for (i = 0; i < res->numAttributes; i++) - { - if (res->attDescs[i].name) - free(res->attDescs[i].name); - } - free(res->attDescs); + /* Free all the subsidiary blocks */ + while ((block = res->curBlock) != NULL) { + res->curBlock = block->next; + free(block); } - /* free the error text */ - if (res->errMsg) - free(res->errMsg); + /* Free the top-level tuple pointer array */ + if (res->tuples) + free(res->tuples); - /* free the structure itself */ + /* Free the PGresult structure itself */ free(res); } -/* - * Free a single tuple structure. - */ - -static void -freeTuple(PGresAttValue *tuple, int numAttributes) -{ - int i; - - if (tuple) - { - for (i = 0; i < numAttributes; i++) - { - if (tuple[i].value) - free(tuple[i].value); - } - free(tuple); - } -} - /* * Handy subroutine to deallocate any partially constructed async result. */ @@ -187,13 +321,8 @@ freeTuple(PGresAttValue *tuple, int numAttributes) void pqClearAsyncResult(PGconn *conn) { - /* Get rid of incomplete result and any not-yet-added tuple */ if (conn->result) - { - if (conn->curTuple) - freeTuple(conn->curTuple, conn->result->numAttributes); PQclear(conn->result); - } conn->result = NULL; conn->curTuple = NULL; } @@ -201,7 +330,7 @@ pqClearAsyncResult(PGconn *conn) /* * addTuple - * add a row to the PGresult structure, growing it if necessary + * add a row pointer to the PGresult structure, growing it if necessary * Returns TRUE if OK, FALSE if not enough memory to add the row */ static int @@ -220,7 +349,7 @@ addTuple(PGresult *res, PGresAttValue *tup) * Note that the positions beyond res->ntups are garbage, not * necessarily NULL. */ - int newSize = res->tupArrSize + TUPARR_GROW_BY; + int newSize = (res->tupArrSize > 0) ? res->tupArrSize * 2 : 128; PGresAttValue ** newTuples = (PGresAttValue **) realloc(res->tuples, newSize * sizeof(PGresAttValue *)); if (! newTuples) @@ -564,7 +693,7 @@ getRowDescriptions(PGconn *conn) if (nfields > 0) { result->attDescs = (PGresAttDesc *) - malloc(nfields * sizeof(PGresAttDesc)); + pqResultAlloc(result, nfields * sizeof(PGresAttDesc), TRUE); MemSet((char *) result->attDescs, 0, nfields * sizeof(PGresAttDesc)); } @@ -574,7 +703,7 @@ getRowDescriptions(PGconn *conn) char typName[MAX_MESSAGE_LEN]; int typid; int typlen; - int atttypmod = -1; + int atttypmod; if (pqGets(typName, MAX_MESSAGE_LEN, conn) || pqGetInt(&typid, 4, conn) || @@ -594,7 +723,7 @@ getRowDescriptions(PGconn *conn) */ if (typlen == 0xFFFF) typlen = -1; - result->attDescs[i].name = strdup(typName); + result->attDescs[i].name = pqResultStrdup(result, typName); result->attDescs[i].typid = typid; result->attDescs[i].typlen = typlen; result->attDescs[i].atttypmod = atttypmod; @@ -618,7 +747,8 @@ getRowDescriptions(PGconn *conn) static int getAnotherTuple(PGconn *conn, int binary) { - int nfields = conn->result->numAttributes; + PGresult *result = conn->result; + int nfields = result->numAttributes; PGresAttValue *tup; char bitmap[MAX_FIELDS]; /* the backend sends us a bitmap * of which attributes are null */ @@ -629,13 +759,13 @@ getAnotherTuple(PGconn *conn, int binary) int bitcnt; /* number of bits examined in current byte */ int vlen; /* length of the current field value */ - conn->result->binary = binary; + result->binary = binary; /* Allocate tuple space if first time for this data message */ if (conn->curTuple == NULL) { conn->curTuple = (PGresAttValue *) - malloc(nfields * sizeof(PGresAttValue)); + pqResultAlloc(result, nfields * sizeof(PGresAttValue), TRUE); if (conn->curTuple == NULL) goto outOfMemory; MemSet((char *) conn->curTuple, 0, nfields * sizeof(PGresAttValue)); @@ -670,12 +800,7 @@ getAnotherTuple(PGconn *conn, int binary) if (!(bmap & 0200)) { /* if the field value is absent, make it a null string */ - if (tup[i].value == NULL) - { - tup[i].value = strdup(""); - if (tup[i].value == NULL) - goto outOfMemory; - } + tup[i].value = result->null_field; tup[i].len = NULL_LEN; } else @@ -689,7 +814,7 @@ getAnotherTuple(PGconn *conn, int binary) vlen = 0; if (tup[i].value == NULL) { - tup[i].value = (char *) malloc(vlen + 1); + tup[i].value = (char *) pqResultAlloc(result, vlen+1, binary); if (tup[i].value == NULL) goto outOfMemory; } @@ -714,14 +839,8 @@ getAnotherTuple(PGconn *conn, int binary) } /* Success! Store the completed tuple in the result */ - if (! addTuple(conn->result, tup)) - { - /* Oops, not enough memory to add the tuple to conn->result, - * so must free it ourselves... - */ - freeTuple(tup, nfields); + if (! addTuple(result, tup)) goto outOfMemory; - } /* and reset for a new message */ conn->curTuple = NULL; return 0; @@ -1437,10 +1556,13 @@ check_field_number(const char *routineName, PGresult *res, int field_num) return FALSE; /* no way to display error message... */ if (field_num < 0 || field_num >= res->numAttributes) { - sprintf(res->conn->errorMessage, - "%s: ERROR! field number %d is out of range 0..%d\n", - routineName, field_num, res->numAttributes - 1); - DONOTICE(res->conn, res->conn->errorMessage); + if (res->conn) + { + sprintf(res->conn->errorMessage, + "%s: ERROR! field number %d is out of range 0..%d\n", + routineName, field_num, res->numAttributes - 1); + DONOTICE(res->conn, res->conn->errorMessage); + } return FALSE; } return TRUE; @@ -1454,18 +1576,24 @@ check_tuple_field_number(const char *routineName, PGresult *res, return FALSE; /* no way to display error message... */ if (tup_num < 0 || tup_num >= res->ntups) { - sprintf(res->conn->errorMessage, - "%s: ERROR! tuple number %d is out of range 0..%d\n", - routineName, tup_num, res->ntups - 1); - DONOTICE(res->conn, res->conn->errorMessage); + if (res->conn) + { + sprintf(res->conn->errorMessage, + "%s: ERROR! tuple number %d is out of range 0..%d\n", + routineName, tup_num, res->ntups - 1); + DONOTICE(res->conn, res->conn->errorMessage); + } return FALSE; } if (field_num < 0 || field_num >= res->numAttributes) { - sprintf(res->conn->errorMessage, - "%s: ERROR! field number %d is out of range 0..%d\n", - routineName, field_num, res->numAttributes - 1); - DONOTICE(res->conn, res->conn->errorMessage); + if (res->conn) + { + sprintf(res->conn->errorMessage, + "%s: ERROR! field number %d is out of range 0..%d\n", + routineName, field_num, res->numAttributes - 1); + DONOTICE(res->conn, res->conn->errorMessage); + } return FALSE; } return TRUE; @@ -1635,10 +1763,13 @@ PQcmdTuples(PGresult *res) if (*p == 0) { - sprintf(res->conn->errorMessage, - "PQcmdTuples (%s) -- bad input from server\n", - res->cmdStatus); - DONOTICE(res->conn, res->conn->errorMessage); + if (res->conn) + { + sprintf(res->conn->errorMessage, + "PQcmdTuples (%s) -- bad input from server\n", + res->cmdStatus); + DONOTICE(res->conn, res->conn->errorMessage); + } return ""; } p++; @@ -1648,9 +1779,12 @@ PQcmdTuples(PGresult *res) p++; /* INSERT: skip oid */ if (*p == 0) { - sprintf(res->conn->errorMessage, - "PQcmdTuples (INSERT) -- there's no # of tuples\n"); - DONOTICE(res->conn, res->conn->errorMessage); + if (res->conn) + { + sprintf(res->conn->errorMessage, + "PQcmdTuples (INSERT) -- there's no # of tuples\n"); + DONOTICE(res->conn, res->conn->errorMessage); + } return ""; } p++; @@ -1680,7 +1814,8 @@ PQgetvalue(PGresult *res, int tup_num, int field_num) /* PQgetlength: returns the length of a field value in bytes. If res is binary, i.e. a result of a binary portal, then the length returned does - NOT include the size field of the varlena. + NOT include the size field of the varlena. (The data returned + by PQgetvalue doesn't either.) */ int PQgetlength(PGresult *res, int tup_num, int field_num) diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index e05ea4bebf..76d21b07c9 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -11,7 +11,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: libpq-int.h,v 1.4 1998/10/01 01:40:25 tgl Exp $ + * $Id: libpq-int.h,v 1.5 1998/11/18 00:47:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -49,12 +49,29 @@ #define ERROR_MSG_LENGTH 4096 #define CMDSTATUS_LEN 40 -/* PGresult and the subsidiary types PGresAttDesc, PGresAttValue +/* + * PGresult and the subsidiary types PGresAttDesc, PGresAttValue * represent the result of a query (or more precisely, of a single SQL * command --- a query string given to PQexec can contain multiple commands). * Note we assume that a single command can return at most one tuple group, * hence there is no need for multiple descriptor sets. */ + +/* Subsidiary-storage management structure for PGresult. + * See space management routines in fe-exec.c for details. + * Note that space[k] refers to the k'th byte starting from the physical + * head of the block. + */ + typedef union pgresult_data PGresult_data; + + union pgresult_data + { + PGresult_data *next; /* link to next block, or NULL */ + char space[1]; /* dummy for accessing block as bytes */ + }; + +/* Data about a single attribute (column) of a query result */ + typedef struct pgresAttDesc { char *name; /* type name */ @@ -63,11 +80,20 @@ int atttypmod; /* type-specific modifier info */ } PGresAttDesc; -/* use char* for Attribute values, - ASCII tuples are guaranteed to be null-terminated - For binary tuples, the first four bytes of the value is the size, - and the bytes afterwards are the value. The binary value is - not guaranteed to be null-terminated. In fact, it can have embedded nulls +/* Data for a single attribute of a single tuple */ + +/* We use char* for Attribute values. + The value pointer always points to a null-terminated area; we add a + null (zero) byte after whatever the backend sends us. This is only + particularly useful for ASCII tuples ... with a binary value, the + value might have embedded nulls, so the application can't use C string + operators on it. But we add a null anyway for consistency. + Note that the value itself does not contain a length word. + + A NULL attribute is a special case in two ways: its len field is NULL_LEN + and its value field points to null_field in the owning PGresult. All the + NULL attributes in a query result point to the same place (there's no need + to store a null string separately for each one). */ #define NULL_LEN (-1) /* pg_result len for NULL value */ @@ -75,7 +101,7 @@ typedef struct pgresAttValue { int len; /* length in bytes of the value */ - char *value; /* actual value */ + char *value; /* actual value, plus terminating zero byte */ } PGresAttValue; struct pg_result @@ -91,15 +117,19 @@ * last insert query */ int binary; /* binary tuple values if binary == 1, * otherwise ASCII */ - /* NOTE: conn is kept here only for the temporary convenience of - * applications that rely on it being here. It will go away in a - * future release, because relying on it is a bad idea --- what if - * the PGresult has outlived the PGconn? About the only thing it was - * really good for was fetching the errorMessage, and we stash that - * here now anyway. - */ - PGconn *conn; /* connection we did the query on */ + PGconn *conn; /* connection we did the query on, if any */ char *errMsg; /* error message, or NULL if no error */ + + /* All NULL attributes in the query result point to this null string */ + char null_field[1]; + + /* Space management information. Note that attDescs and errMsg, + * if not null, point into allocated blocks. But tuples points + * to a separately malloc'd block, so that we can realloc it. + */ + PGresult_data *curBlock; /* most recently allocated block */ + int curOffset; /* start offset of free space in block */ + int spaceLeft; /* number of free bytes remaining in block */ }; /* PGAsyncStatusType defines the state of the query-execution state machine */ @@ -202,6 +232,8 @@ extern int pqPacketSend(PGconn *conn, const char *buf, size_t len); /* === in fe-exec.c === */ extern void pqSetResultError(PGresult *res, const char *msg); +extern void * pqResultAlloc(PGresult *res, int nBytes, int isBinary); +extern char * pqResultStrdup(PGresult *res, const char *str); extern void pqClearAsyncResult(PGconn *conn); /* === in fe-misc.c === */ -- 2.40.0