]> granicus.if.org Git - postgresql/commitdiff
I found the libpq function PGunescapeBytea a little slow. It was taking a
authorBruce Momjian <bruce@momjian.us>
Thu, 12 Jun 2003 01:17:19 +0000 (01:17 +0000)
committerBruce Momjian <bruce@momjian.us>
Thu, 12 Jun 2003 01:17:19 +0000 (01:17 +0000)
minute and a half to decode a 500Kb on a fairly fast machine. I think the
culprit is sscanf.

I attach a patch that replaces the function with one used to perform the same
task in pyPgSQL (a Python interface to PostgreSQL). This code was written by
Billy Allie, author of pyPgSQL. I've changed a few variable names to match
those in the original code and removed a bit of Pythonness.

Billy has kindly looked at the code and points out that it is slightly
stricter than the original implementation and if it encounters an invalid
bytea such as '\12C' it drops the unescape '\' and outputs '12C'.

The code is licensed by the author under a BSD license.

I've performed limited testing of the function by putting JPEGs into
PostgreSQL, extracting them using them using the new function and diffing
against the original files.

The new function is significantly faster on my machine with the JPEGs being
decoded in less than a second. I attach a modified libpq example program that
I used for my testing.

Ben Lamb.

src/interfaces/libpq/fe-exec.c

index 0ea46ca2894aa4ad3f235887cfb9725fb440fdb0..45db359bde18ed557029c5dd27d78504c4b7f04e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.137 2003/06/08 17:43:00 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.138 2003/06/12 01:17:19 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1690,6 +1690,8 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
        return result;
 }
 
+#define VAL(CH) ((CH) - '0')
+
 /*
  *             PQunescapeBytea - converts the null terminated string representation
  *             of a bytea, strtext, into binary, filling a buffer. It returns a
@@ -1697,99 +1699,64 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
  *             buffer in retbuflen. The pointer may subsequently be used as an
  *             argument to the function free(3). It is the reverse of PQescapeBytea.
  *
- *             The following transformations are reversed:
- *             '\0' == ASCII  0 == \000
- *             '\'' == ASCII 39 == \'
- *             '\\' == ASCII 92 == \\
+ *             The following transformations are made:
+ *             \'   == ASCII 39 == '
+ *             \\   == ASCII 92 == \
+ *             \ooo == a byte whose value = ooo (ooo is an octal number)
+ *             \x   == x (x is any character not matched by the above transformations)
  *
- *             States:
- *             0       normal          0->1->2->3->4
- *             1       \                          1->5
- *             2       \0                         1->6
- *             3       \00
- *             4       \000
- *             5       \'
- *             6       \\
  */
 unsigned char *
 PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen)
 {
-       size_t          buflen;
-       unsigned char *buffer,
-                          *bp;
-       const unsigned char *sp;
-       unsigned int state = 0;
+       size_t strtextlen, buflen;
+       unsigned char *buffer, *tmpbuf;
+       int i, j, byte;
 
-       if (strtext == NULL)
+       if (strtext == NULL) {
                return NULL;
-       buflen = strlen(strtext);       /* will shrink, also we discover if
-                                                                * strtext */
-       buffer = (unsigned char *) malloc(buflen);      /* isn't NULL terminated */
+       }
+
+       strtextlen = strlen(strtext);   /* will shrink, also we discover if
+                                                                        * strtext isn't NULL terminated */
+       buffer = (unsigned char *)malloc(strtextlen);
        if (buffer == NULL)
                return NULL;
-       for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
+
+       for (i = j = buflen = 0; i < strtextlen;)
        {
-               switch (state)
+               switch (strtext[i])
                {
-                       case 0:
-                               if (*sp == '\\')
-                                       state = 1;
-                               *bp = *sp;
-                               break;
-                       case 1:
-                               if (*sp == '\'')        /* state=5 */
-                               {                               /* replace \' with 39 */
-                                       bp--;
-                                       *bp = '\'';
-                                       buflen--;
-                                       state = 0;
-                               }
-                               else if (*sp == '\\')   /* state=6 */
-                               {                               /* replace \\ with 92 */
-                                       bp--;
-                                       *bp = '\\';
-                                       buflen--;
-                                       state = 0;
-                               }
+                       case '\\':
+                               i++;
+                               if (strtext[i] == '\\')
+                                       buffer[j++] = strtext[i++];
                                else
                                {
-                                       if (isdigit(*sp))
-                                               state = 2;
-                                       else
-                                               state = 0;
-                                       *bp = *sp;
+                                       if ((isdigit(strtext[i]))   &&
+                                               (isdigit(strtext[i+1])) &&
+                                               (isdigit(strtext[i+2])))
+                                       {
+                                               byte = VAL(strtext[i++]);
+                                               byte = (byte << 3) + VAL(strtext[i++]);
+                                               buffer[j++] = (byte << 3) + VAL(strtext[i++]);
+                                       }
                                }
                                break;
-                       case 2:
-                               if (isdigit(*sp))
-                                       state = 3;
-                               else
-                                       state = 0;
-                               *bp = *sp;
-                               break;
-                       case 3:
-                               if (isdigit(*sp))               /* state=4 */
-                               {
-                                       int                     v;
 
-                                       bp -= 3;
-                                       sscanf(sp - 2, "%03o", &v);
-                                       *bp = v;
-                                       buflen -= 3;
-                                       state = 0;
-                               }
-                               else
-                               {
-                                       *bp = *sp;
-                                       state = 0;
-                               }
-                               break;
+                       default:
+                               buffer[j++] = strtext[i++];
                }
        }
-       buffer = realloc(buffer, buflen);
-       if (buffer == NULL)
-               return NULL;
+       buflen = j; /* buflen is the length of the unquoted data */
+       tmpbuf = realloc(buffer, buflen);
+
+       if (!tmpbuf)
+       {
+               free(buffer);
+               return 0;
+       }
 
        *retbuflen = buflen;
-       return buffer;
+       return tmpbuf;
 }