]> granicus.if.org Git - postgresql/commitdiff
Added another single byte oriented decompressor, useful for
authorJan Wieck <JanWieck@Yahoo.com>
Thu, 25 Nov 1999 01:28:07 +0000 (01:28 +0000)
committerJan Wieck <JanWieck@Yahoo.com>
Thu, 25 Nov 1999 01:28:07 +0000 (01:28 +0000)
comparision functions.

Added all lztext comparision functions, operators and a default
operator class for nbtree on lztext.

Jan

src/backend/utils/adt/lztext.c
src/backend/utils/adt/pg_lzcompress.c
src/include/catalog/pg_amop.h
src/include/catalog/pg_amproc.h
src/include/catalog/pg_opclass.h
src/include/catalog/pg_operator.h
src/include/catalog/pg_proc.h
src/include/utils/builtins.h
src/include/utils/pg_lzcompress.h

index 6ff128515b614b7f0b506e5b72c1a956f86bfb06..ef31094fb51323152233fd704e0877e4b4c3e055 100644 (file)
@@ -1,7 +1,7 @@
 /* ----------
  * lztext.c -
  *
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/lztext.c,v 1.3 1999/11/24 03:45:12 ishii Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/lztext.c,v 1.4 1999/11/25 01:28:04 wieck Exp $
  *
  *     Text type with internal LZ compressed representation. Uses the
  *     standard PostgreSQL compression method.
@@ -290,3 +290,146 @@ lztext_text(lztext *lz)
 }
 
 
+/* ----------
+ * lztext_cmp -
+ *
+ *             Comparision function for two lztext datum's.
+ *
+ *             Returns -1, 0 or 1.
+ * ----------
+ */
+int32
+lztext_cmp(lztext *lz1, lztext *lz2)
+{
+#ifdef USE_LOCALE
+
+       char   *cp1;
+       char   *cp2;
+       int             result;
+
+       if (lz1 == NULL || lz2 == NULL)
+               return (int32)0;
+
+       cp1 = lztextout(lz1);
+       cp2 = lztextout(lz2);
+
+       result = strcoll(cp1, cp2);
+
+       pfree(cp1);
+       pfree(cp2);
+
+       return result;
+
+#else /* !USE_LOCALE */
+
+       PGLZ_DecompState        ds1;
+       PGLZ_DecompState        ds2;
+       int                                     c1;
+       int                                     c2;
+       int32                           result = (int32)0;
+
+       if (lz1 == NULL || lz2 == NULL)
+               return (int32)0;
+
+       pglz_decomp_init(&ds1, lz1);
+       pglz_decomp_init(&ds2, lz2);
+
+       for(;;)
+       {
+               c1 = pglz_decomp_getchar(&ds1);
+               c2 = pglz_decomp_getchar(&ds2);
+
+               if (c1 == EOF)
+               {
+                       if (c2 != EOF)
+                               result = (int32)-1;
+                       break;
+               } else {
+                       if (c2 == EOF)
+                       {
+                               result = (int32)1;
+                       }
+               }
+               if (c1 != c2)
+               {
+                       result = (int32)(c1 - c2);
+                       break;
+               }
+       }
+
+       pglz_decomp_end(&ds1);
+       pglz_decomp_end(&ds2);
+
+       return result;
+
+#endif /* USE_LOCALE */
+}
+
+
+/* ----------
+ * lztext_eq ... -
+ *
+ *             =, !=, >, >=, < and <= operator functions for two
+ *             lztext datums.
+ * ----------
+ */
+bool
+lztext_eq(lztext *lz1, lztext *lz2)
+{
+       if (lz1 == NULL || lz2 == NULL)
+               return false;
+
+       return (bool)(lztext_cmp(lz1, lz2) == 0);
+}
+
+
+bool
+lztext_ne(lztext *lz1, lztext *lz2)
+{
+       if (lz1 == NULL || lz2 == NULL)
+               return false;
+
+       return (bool)(lztext_cmp(lz1, lz2) != 0);
+}
+
+
+bool
+lztext_gt(lztext *lz1, lztext *lz2)
+{
+       if (lz1 == NULL || lz2 == NULL)
+               return false;
+
+       return (bool)(lztext_cmp(lz1, lz2) > 0);
+}
+
+
+bool
+lztext_ge(lztext *lz1, lztext *lz2)
+{
+       if (lz1 == NULL || lz2 == NULL)
+               return false;
+
+       return (bool)(lztext_cmp(lz1, lz2) >= 0);
+}
+
+
+bool
+lztext_lt(lztext *lz1, lztext *lz2)
+{
+       if (lz1 == NULL || lz2 == NULL)
+               return false;
+
+       return (bool)(lztext_cmp(lz1, lz2) < 0);
+}
+
+
+bool
+lztext_le(lztext *lz1, lztext *lz2)
+{
+       if (lz1 == NULL || lz2 == NULL)
+               return false;
+
+       return (bool)(lztext_cmp(lz1, lz2) <= 0);
+}
+
+
index b3eb5e2a013d2f0d0a5efa3ac5081d2103ff181d..c35568e9598116b29c1c98c953065ee395e893fd 100644 (file)
@@ -1,7 +1,7 @@
 /* ----------
  * pg_lzcompress.c -
  *
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.2 1999/11/17 22:18:45 wieck Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.3 1999/11/25 01:28:04 wieck Exp $
  *
  *             This is an implementation of LZ compression for PostgreSQL.
  *             It uses a simple history table and generates 2-3 byte tags
@@ -671,3 +671,167 @@ pglz_decompress (PGLZ_Header *source, char *dest)
 }
 
 
+/* ----------
+ * pglz_get_next_decomp_char_from_lzdata -
+ *
+ *             Reads the next character from a decompression state if the
+ *             input data to pglz_decomp_init() was in compressed format.
+ * ----------
+ */
+int
+pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate)
+{
+       unsigned char           retval;
+
+       if (dstate->tocopy > 0)
+       {
+               /* ----------
+                * Copy one byte from output to output until we did it
+                * for the length specified by the last tag. Return that
+                * byte.
+                * ----------
+                */
+               dstate->tocopy--;
+               return (*(dstate->cp_out++) = *(dstate->cp_copy++));
+       }
+
+       if (dstate->ctrl_count == 0)
+       {
+               /* ----------
+                * Get the next control byte if we need to, but check
+                * for EOF before.
+                * ----------
+                */
+               if (dstate->cp_in == dstate->cp_end)
+               {
+                       return EOF;
+               }
+
+               /* ----------
+                * This decompression method saves time only, if we stop near
+                * the beginning of the data (maybe because we're called by a
+                * comparision function and a difference occurs early). Otherwise,
+                * all the checks, needed here, cause too much overhead.
+                *
+                * Thus we decompress the entire rest at once into the temporary
+                * buffer and change the decomp state to return the prepared
+                * data from the buffer by the more simple calls to
+                * pglz_get_next_decomp_char_from_plain().
+                * ----------
+                */
+               if (dstate->cp_out - dstate->temp_buf >= 256)
+               {
+                       unsigned char      *cp_in               = dstate->cp_in;
+                       unsigned char      *cp_out              = dstate->cp_out;
+                       unsigned char      *cp_end              = dstate->cp_end;
+                       unsigned char      *cp_copy;
+                       unsigned char           ctrl;
+                       int                                     off;
+                       int                                     len;
+                       int                                     i;
+
+                       while (cp_in < cp_end)
+                       {
+                               ctrl = *cp_in++;
+
+                               for (i = 0; i < 8; i++)
+                               {
+                                       if (cp_in == cp_end)
+                                               break;
+
+                                       if (ctrl & 0x01)
+                                       {
+                                               len = (cp_in[0] & 0x0f) + 3;
+                                               off = ((cp_in[0] & 0xf0) << 4) | cp_in[1];
+                                               cp_in += 2;
+                                               if (len == 18)
+                                                       len += *cp_in++;
+
+                                               cp_copy = cp_out - off;
+                                               while(len--)
+                                                       *cp_out++ = *cp_copy++;
+                                       } else {
+                                               *cp_out++ = *cp_in++;
+                                       }
+                                       ctrl >>= 1;
+                               }
+                       }
+
+                       dstate->cp_in           = dstate->cp_out;
+                       dstate->cp_end          = cp_out;
+                       dstate->next_char       = pglz_get_next_decomp_char_from_plain;
+
+                       return (int)(*(dstate->cp_in++));
+               }
+
+               /* ----------
+                * Not yet, get next control byte into decomp state.
+                * ----------
+                */
+               dstate->ctrl = (unsigned char)(*(dstate->cp_in++));
+               dstate->ctrl_count = 8;
+       }
+
+       /* ----------
+        * Check for EOF in tag/literal byte data.
+        * ----------
+        */
+       if (dstate->cp_in == dstate->cp_end)
+       {
+               return EOF;
+       }
+
+       /* ----------
+        * Handle next control bit.
+        * ----------
+        */
+       dstate->ctrl_count--;
+       if (dstate->ctrl & 0x01)
+       {
+               /* ----------
+                * Bit is set, so tag is following. Setup copy information
+                * and do the copy for the first byte as above.
+                * ----------
+                */
+               int             off;
+
+               dstate->tocopy  = (dstate->cp_in[0] & 0x0f) + 3;
+               off                             = ((dstate->cp_in[0] & 0xf0) << 4) | dstate->cp_in[1];
+               dstate->cp_in   += 2;
+               if (dstate->tocopy == 18)
+                       dstate->tocopy += *(dstate->cp_in++);
+               dstate->cp_copy = dstate->cp_out - off;
+
+               dstate->tocopy--;
+               retval = (*(dstate->cp_out++) = *(dstate->cp_copy++));
+       } else {
+               /* ----------
+                * Bit is unset, so literal byte follows.
+                * ----------
+                */
+               retval = (int)(*(dstate->cp_out++) = *(dstate->cp_in++));
+       }
+       dstate->ctrl >>= 1;
+
+       return (int)retval;
+}
+
+
+/* ----------
+ * pglz_get_next_decomp_char_from_plain -
+ *
+ *             The input data to pglz_decomp_init() was stored in uncompressed
+ *             format. So we don't have a temporary output buffer and simply
+ *             return bytes from the input until EOF.
+ * ----------
+ */
+int
+pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate)
+{
+       if (dstate->cp_in >= dstate->cp_end)
+               return EOF;
+
+       return (int)(*(dstate->cp_in++));
+}
+
+
index 0cbaac532257baa0103903ed0b744596941e24e4..ef499d7afdc214fe4fcc7b3e7165a1b5a7b7f1d4 100644 (file)
@@ -7,7 +7,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_amop.h,v 1.24 1999/09/29 21:13:30 wieck Exp $
+ * $Id: pg_amop.h,v 1.25 1999/11/25 01:28:05 wieck Exp $
  *
  * NOTES
  *      the genbki.sh script reads this file and generates .bki
@@ -348,6 +348,16 @@ DATA(insert OID = 0 (  403 1768 1752 3 btreesel btreenpage ));
 DATA(insert OID = 0 (  403 1768 1757 4 btreesel btreenpage ));
 DATA(insert OID = 0 (  403 1768 1756 5 btreesel btreenpage ));
 
+/*
+ *     nbtree lztext
+ */
+
+DATA(insert OID = 0 (  403 1663 1659 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1663 1660 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1663 1657 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1663 1662 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1663 1661 5 btreesel btreenpage ));
+
 /*
  *     hash table _ops
  */
index 45d1b28587ad15ad3c328611b7624a1a74e54266..8adee475b0cf30fa014512d65cfc37b3ddba742e 100644 (file)
@@ -9,7 +9,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_amproc.h,v 1.15 1999/09/29 21:13:30 wieck Exp $
+ * $Id: pg_amproc.h,v 1.16 1999/11/25 01:28:05 wieck Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
@@ -97,6 +97,7 @@ DATA(insert OID = 0 (403 1313 1315 1));
 DATA(insert OID = 0 (403 810 836 1));
 DATA(insert OID = 0 (403 935 926 1));
 DATA(insert OID = 0 (403 1768 1769 1));
+DATA(insert OID = 0 (403 1663 1636 1));
 
 
 /* hash */
index 8b670e3e076efd3fb6ae479354f9969f50f96e10..0284d083352f205fe7004aa0443afdf1778084be 100644 (file)
@@ -7,7 +7,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_opclass.h,v 1.22 1999/11/23 04:47:39 momjian Exp $
+ * $Id: pg_opclass.h,v 1.23 1999/11/25 01:28:05 wieck Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
@@ -117,5 +117,7 @@ DATA(insert OID = 652  (    cidr_ops        650       ));
 DESCR("");
 DATA(insert OID = 1768 (       numeric_ops    1700       ));
 DESCR("");
+DATA(insert OID = 1663 (       lztext_ops    1625      ));
+DESCR("");
 
 #endif  /* PG_OPCLASS_H */
index 00d19560b9305cc2dd4033bc9cfaa744f39b146b..b572b0b01a39249a8522e93762f4293fd3a45e44 100644 (file)
@@ -7,7 +7,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_operator.h,v 1.61 1999/11/22 17:56:37 momjian Exp $
+ * $Id: pg_operator.h,v 1.62 1999/11/25 01:28:05 wieck Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
@@ -687,6 +687,14 @@ DATA(insert OID = 1761 (  "/"         PGUID 0 b t f 1700 1700 1700    0    0 0 0 numeric
 DATA(insert OID = 1762 (  "%"     PGUID 0 b t f 1700 1700 1700    0    0 0 0 numeric_mod - - ));
 DATA(insert OID = 1763 (  "@"     PGUID 0 l t f        0 1700 1700    0        0 0 0 numeric_abs - - ));
 
+/* LZTEXT type */
+DATA(insert OID = 1657 (  "="     PGUID 0 b t f 1625 1625       16 1657 1658 1659 1659 lztext_eq eqsel eqjoinsel ));
+DATA(insert OID = 1658 (  "<>"    PGUID 0 b t f 1625 1625       16 1658 1657 0 0 lztext_ne neqsel neqjoinsel ));
+DATA(insert OID = 1659 (  "<"     PGUID 0 b t f 1625 1625       16 1661 1662 0 0 lztext_lt intltsel intltjoinsel ));
+DATA(insert OID = 1660 (  "<="    PGUID 0 b t f 1625 1625       16 1662 1661 0 0 lztext_le intltsel intltjoinsel ));
+DATA(insert OID = 1661 (  ">"     PGUID 0 b t f 1625 1625       16 1659 1660 0 0 lztext_gt intgtsel intgtjoinsel ));
+DATA(insert OID = 1662 (  ">="    PGUID 0 b t f 1625 1625       16 1660 1659 0 0 lztext_ge intgtsel intgtjoinsel ));
+
 
 
 /*
index bb2a5b6dd7f36b66f3f5c4b0d64171aac7bfc87b..2cf19d5788c851d72a64d1d1aa7f0a0c3794b403 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.106 1999/11/17 21:21:50 wieck Exp $
+ * $Id: pg_proc.h,v 1.107 1999/11/25 01:28:05 wieck Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -2359,6 +2359,20 @@ DATA(insert OID = 1634 ( lztextoctetlen                  PGUID 11 f t t 1 f 23 "1625" 100 0
 DESCR("octet length");
 DATA(insert OID = 1635 ( octet_length                  PGUID 11 f t t 1 f 23 "1625" 100 0 1 0  lztextoctetlen - ));
 DESCR("octet length");
+DATA(insert OID = 1636 ( lztext_cmp                            PGUID 11 f t t 2 f 23 "1625 1625" 100 0 1 0  lztext_cmp - ));
+DESCR("compare lztext vs. lztext");
+DATA(insert OID = 1637 ( lztext_eq                             PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0  lztext_eq - ));
+DESCR("equal");
+DATA(insert OID = 1638 ( lztext_ne                             PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0  lztext_ne - ));
+DESCR("not equal");
+DATA(insert OID = 1639 ( lztext_gt                             PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0  lztext_gt - ));
+DESCR("greater-than");
+DATA(insert OID = 1654 ( lztext_ge                             PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0  lztext_ge - ));
+DESCR("greater-than-or-equal");
+DATA(insert OID = 1655 ( lztext_lt                             PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0  lztext_lt - ));
+DESCR("lower-than");
+DATA(insert OID = 1656 ( lztext_le                             PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0  lztext_le - ));
+DESCR("lower-than-or-equal");
 
 
 /*
index 1bf3273ca1316e70b060db14ac1ed4c25c770178..0b24dbab3fece409c31b899b815db07bd05fc8b0 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.90 1999/11/17 21:21:51 wieck Exp $
+ * $Id: builtins.h,v 1.91 1999/11/25 01:28:07 wieck Exp $
  *
  * NOTES
  *       This should normally only be included by fmgr.h.
@@ -635,5 +635,12 @@ text          *lztext_text(lztext *lz);
 lztext    *text_lztext(text *txt);
 int32          lztextlen(lztext *lz);
 int32          lztextoctetlen(lztext *lz);
+int32          lztext_cmp(lztext *lz1, lztext *lz2);
+bool           lztext_eq(lztext *lz1, lztext *lz2);
+bool           lztext_ne(lztext *lz1, lztext *lz2);
+bool           lztext_gt(lztext *lz1, lztext *lz2);
+bool           lztext_ge(lztext *lz1, lztext *lz2);
+bool           lztext_lt(lztext *lz1, lztext *lz2);
+bool           lztext_le(lztext *lz1, lztext *lz2);
 
 #endif  /* BUILTINS_H */
index 481fd24fab810b707c14803ce9d0e162694b302c..9e3d3f32945b6b166735b4670cde3aa2e105a747 100644 (file)
@@ -1,7 +1,7 @@
 /* ----------
  * pg_lzcompress.h -
  *
- * $Header: /cvsroot/pgsql/src/include/utils/pg_lzcompress.h,v 1.2 1999/11/17 22:18:46 wieck Exp $
+ * $Header: /cvsroot/pgsql/src/include/utils/pg_lzcompress.h,v 1.3 1999/11/25 01:28:07 wieck Exp $
  *
  *     Definitions for the builtin LZ compressor
  * ----------
@@ -110,6 +110,26 @@ typedef struct PGLZ_Strategy {
 } PGLZ_Strategy;
 
 
+/* ----------
+ * PGLZ_DecompState -
+ *
+ *             Decompression state variable for byte-per-byte decompression
+ *             using pglz_decomp_getchar() macro.
+ * ----------
+ */
+typedef struct PGLZ_DecompState {
+       unsigned char      *temp_buf;
+       unsigned char      *cp_in;
+       unsigned char      *cp_end;
+       unsigned char      *cp_out;
+       unsigned char      *cp_copy;
+       int                                     (*next_char)(struct PGLZ_DecompState *dstate);
+       int                                     tocopy;
+       int                                     ctrl_count;
+       unsigned char           ctrl;
+} PGLZ_DecompState;
+
+
 /* ----------
  * The standard strategies
  *
@@ -139,6 +159,55 @@ extern PGLZ_Strategy       *PGLZ_strategy_allways;
 extern PGLZ_Strategy   *PGLZ_strategy_never;
 
 
+/* ----------
+ * pglz_decomp_getchar -
+ *
+ *             Get next character (or EOF) from decompressor.
+ *             The status variable must be initialized before and deinitialized
+ *             after compression with the next two macros below.
+ * ----------
+ */
+#define pglz_decomp_getchar(_ds)                                                                                       \
+       ((*((_ds)->next_char))((_ds)))
+
+
+/* ----------
+ * pglz_decomp_init -
+ *
+ *             Initialize a decomp state from a compressed input.
+ * ----------
+ */
+#define pglz_decomp_init(_ds,_lz) {                                                                                    \
+               (_ds)->cp_in            = ((unsigned char *)(_lz))                                              \
+                                                                                       + sizeof(PGLZ_Header);                  \
+               (_ds)->cp_end           = (_ds)->cp_in + (_lz)->varsize                                 \
+                                                                                       - sizeof(PGLZ_Header);                  \
+               if (PGLZ_IS_COMPRESSED((_lz))) {                                                                        \
+                       (_ds)->temp_buf         = (unsigned char *)                                                     \
+                                                                               palloc(PGLZ_RAW_SIZE((_lz)));           \
+                       (_ds)->cp_out           = (_ds)->temp_buf;                                                      \
+                       (_ds)->next_char        = pglz_get_next_decomp_char_from_lzdata;        \
+                       (_ds)->tocopy           = 0;                                                                            \
+                       (_ds)->ctrl_count       = 0;                                                                            \
+               } else {                                                                                                                        \
+                       (_ds)->temp_buf         = NULL;                                                                         \
+                       (_ds)->next_char        = pglz_get_next_decomp_char_from_plain;         \
+               }                                                                                                                                       \
+       }
+
+
+/* ----------
+ * pglz_decomp_end -
+ *
+ *             Deallocate resources after decompression.
+ * ----------
+ */
+#define pglz_decomp_end(_ds) {                                                                                         \
+               if ((_ds)->temp_buf != NULL)                                                                            \
+                       pfree((void *)((_ds)->temp_buf));                                                               \
+       }
+
+
 /* ----------
  * Global function declarations
  * ----------
@@ -148,5 +217,13 @@ int        pglz_compress (char *source, int32 slen, PGLZ_Header *dest,
 int pglz_decompress (PGLZ_Header *source, char *dest);
 
 
+/* ----------
+ * Functions used by pglz_decomp_getchar().
+ * Internal use only.
+ * ----------
+ */
+extern int pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate);
+extern int pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate);
+
 #endif /* _PG_LZCOMPRESS_H_ */