]> granicus.if.org Git - postgresql/blobdiff - src/backend/utils/adt/uuid.c
Add support for EUI-64 MAC addresses as macaddr8
[postgresql] / src / backend / utils / adt / uuid.c
index 76a012ed57779e3e647d7ba7318432a05c5fad54..eaf2f8064d4c75d03612c40183fa87d45d008b10 100644 (file)
@@ -3,10 +3,10 @@
  * uuid.c
  *       Functions for the built-in type "uuid".
  *
- * Copyright (c) 2007-2008, PostgreSQL Global Development Group
+ * Copyright (c) 2007-2017, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/uuid.c,v 1.8 2008/11/03 22:14:40 petere Exp $
+ *       src/backend/utils/adt/uuid.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
+#include "port/pg_bswap.h"
 #include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/sortsupport.h"
 #include "utils/uuid.h"
 
-/* uuid size in bytes */
-#define UUID_LEN 16
-
-/* pg_uuid_t is declared to be struct pg_uuid_t in uuid.h */
-struct pg_uuid_t
+/* sortsupport for uuid */
+typedef struct
 {
-       unsigned char data[UUID_LEN];
-};
+       int64           input_count;    /* number of non-null values seen */
+       bool            estimating;             /* true if estimating cardinality */
+
+       hyperLogLogState abbr_card; /* cardinality estimator */
+} uuid_sortsupport_state;
 
 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
 static int     uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
+static int     uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int     uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
 
 Datum
 uuid_in(PG_FUNCTION_ARGS)
@@ -83,12 +91,13 @@ static void
 string_to_uuid(const char *source, pg_uuid_t *uuid)
 {
        const char *src = source;
-       int     i, braces = 0;
+       bool            braces = false;
+       int                     i;
 
        if (src[0] == '{')
        {
-               ++src;
-               braces = 1;
+               src++;
+               braces = true;
        }
 
        for (i = 0; i < UUID_LEN; i++)
@@ -111,9 +120,9 @@ string_to_uuid(const char *source, pg_uuid_t *uuid)
 
        if (braces)
        {
-               if (*src!= '}')
+               if (*src != '}')
                        goto syntax_error;
-               ++src;
+               src++;
        }
 
        if (*src != '\0')
@@ -124,8 +133,8 @@ string_to_uuid(const char *source, pg_uuid_t *uuid)
 syntax_error:
        ereport(ERROR,
                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                        errmsg("invalid input syntax for uuid: \"%s\"",
-                                       source)));
+                        errmsg("invalid input syntax for type %s: \"%s\"",
+                                       "uuid", source)));
 }
 
 Datum
@@ -221,6 +230,176 @@ uuid_cmp(PG_FUNCTION_ARGS)
        PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
 }
 
+/*
+ * Sort support strategy routine
+ */
+Datum
+uuid_sortsupport(PG_FUNCTION_ARGS)
+{
+       SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+       ssup->comparator = uuid_fast_cmp;
+       ssup->ssup_extra = NULL;
+
+       if (ssup->abbreviate)
+       {
+               uuid_sortsupport_state *uss;
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+               uss = palloc(sizeof(uuid_sortsupport_state));
+               uss->input_count = 0;
+               uss->estimating = true;
+               initHyperLogLog(&uss->abbr_card, 10);
+
+               ssup->ssup_extra = uss;
+
+               ssup->comparator = uuid_cmp_abbrev;
+               ssup->abbrev_converter = uuid_abbrev_convert;
+               ssup->abbrev_abort = uuid_abbrev_abort;
+               ssup->abbrev_full_comparator = uuid_fast_cmp;
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport comparison func
+ */
+static int
+uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+       pg_uuid_t  *arg1 = DatumGetUUIDP(x);
+       pg_uuid_t  *arg2 = DatumGetUUIDP(y);
+
+       return uuid_internal_cmp(arg1, arg2);
+}
+
+/*
+ * Abbreviated key comparison func
+ */
+static int
+uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+       if (x > y)
+               return 1;
+       else if (x == y)
+               return 0;
+       else
+               return -1;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative uuid comparator.
+ */
+static bool
+uuid_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+       uuid_sortsupport_state *uss = ssup->ssup_extra;
+       double          abbr_card;
+
+       if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+               return false;
+
+       abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+       /*
+        * If we have >100k distinct values, then even if we were sorting many
+        * billion rows we'd likely still break even, and the penalty of undoing
+        * that many rows of abbrevs would probably not be worth it.  Stop even
+        * counting at that point.
+        */
+       if (abbr_card > 100000.0)
+       {
+#ifdef TRACE_SORT
+               if (trace_sort)
+                       elog(LOG,
+                                "uuid_abbrev: estimation ends at cardinality %f"
+                                " after " INT64_FORMAT " values (%d rows)",
+                                abbr_card, uss->input_count, memtupcount);
+#endif
+               uss->estimating = false;
+               return false;
+       }
+
+       /*
+        * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
+        * fudge factor allows us to abort earlier on genuinely pathological data
+        * where we've had exactly one abbreviated value in the first 2k
+        * (non-null) rows.
+        */
+       if (abbr_card < uss->input_count / 2000.0 + 0.5)
+       {
+#ifdef TRACE_SORT
+               if (trace_sort)
+                       elog(LOG,
+                                "uuid_abbrev: aborting abbreviation at cardinality %f"
+                          " below threshold %f after " INT64_FORMAT " values (%d rows)",
+                                abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+                                memtupcount);
+#endif
+               return true;
+       }
+
+#ifdef TRACE_SORT
+       if (trace_sort)
+               elog(LOG,
+                        "uuid_abbrev: cardinality %f after " INT64_FORMAT
+                        " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+       return false;
+}
+
+/*
+ * Conversion routine for sortsupport.  Converts original uuid representation
+ * to abbreviated key representation.  Our encoding strategy is simple -- pack
+ * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
+ * machines, the bytes are stored in reverse order), and treat it as an
+ * unsigned integer.
+ */
+static Datum
+uuid_abbrev_convert(Datum original, SortSupport ssup)
+{
+       uuid_sortsupport_state *uss = ssup->ssup_extra;
+       pg_uuid_t  *authoritative = DatumGetUUIDP(original);
+       Datum           res;
+
+       memcpy(&res, authoritative->data, sizeof(Datum));
+       uss->input_count += 1;
+
+       if (uss->estimating)
+       {
+               uint32          tmp;
+
+#if SIZEOF_DATUM == 8
+               tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else                                                  /* SIZEOF_DATUM != 8 */
+               tmp = (uint32) res;
+#endif
+
+               addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+       }
+
+       /*
+        * Byteswap on little-endian machines.
+        *
+        * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
+        * comparator) works correctly on all platforms.  If we didn't do this,
+        * the comparator would have to call memcmp() with a pair of pointers to
+        * the first byte of each abbreviated key, which is slower.
+        */
+       res = DatumBigEndianToNative(res);
+
+       return res;
+}
+
 /* hash index support */
 Datum
 uuid_hash(PG_FUNCTION_ARGS)