Improve bit perturbation in TupleHashTableHash.

author Andres Freund <andres@anarazel.de>

Mon, 29 Jan 2018 19:02:09 +0000 (11:02 -0800)

committer Andres Freund <andres@anarazel.de>

Mon, 29 Jan 2018 19:24:57 +0000 (11:24 -0800)
author Andres Freund <andres@anarazel.de>
Mon, 29 Jan 2018 19:02:09 +0000 (11:02 -0800)
committer Andres Freund <andres@anarazel.de>
Mon, 29 Jan 2018 19:24:57 +0000 (11:24 -0800)
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

index 07c8852fca8228c4e65e8ab4a3799fc7e90bd33f..cde5f676c1b33e41f6f579f8de1979719f3ea536 100644 (file)
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -23,6 +23,7 @@
  #include "executor/executor.h"
  #include "miscadmin.h"
  #include "utils/lsyscache.h"
+#include "utils/hashutils.h"
  #include "utils/memutils.h"
  
  static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
@@ -326,7 +327,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
          * underestimated.
          */
         if (use_variable_hash_iv)
-               hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
+               hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
         else
                 hashtable->hash_iv = 0;
  
@@ -510,7 +511,13 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
                 }
         }
  
-       return hashkey;
+       /*
+        * The way hashes are combined above, among each other and with the IV,
+        * doesn't lead to good bit perturbation. As the IV's goal is to lead to
+        * achieve that, perform a round of hashing of the combined hash -
+        * resulting in near perfect perturbation.
+        */
+       return murmurhash32(hashkey);
  }
  
  /*
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out

index cbfdbfd8563f8704acf96755c0755446509bbfa3..d21a494a9ddfc2d2243154ce6bfc741e887d858b 100644 (file)
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1183,29 +1183,33 @@ explain (costs off)
  -- simple rescan tests
  select a, b, sum(v.x)
    from (values (1),(2)) v(x), gstest_data(v.x)
- group by grouping sets (a,b);
+ group by grouping sets (a,b)
+ order by 1, 2, 3;
   a | b | sum 
  ---+---+-----
- 2 |   |   6
   1 |   |   3
+ 2 |   |   6
+   | 1 |   3
     | 2 |   3
     | 3 |   3
-   | 1 |   3
  (5 rows)
  
  explain (costs off)
    select a, b, sum(v.x)
      from (values (1),(2)) v(x), gstest_data(v.x)
-   group by grouping sets (a,b);
-                QUERY PLAN                
-------------------------------------------
- HashAggregate
-   Hash Key: gstest_data.a
-   Hash Key: gstest_data.b
-   ->  Nested Loop
-         ->  Values Scan on "*VALUES*"
-         ->  Function Scan on gstest_data
-(6 rows)
+   group by grouping sets (a,b)
+   order by 3, 1, 2;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Sort
+   Sort Key: (sum("*VALUES*".column1)), gstest_data.a, gstest_data.b
+   ->  HashAggregate
+         Hash Key: gstest_data.a
+         Hash Key: gstest_data.b
+         ->  Nested Loop
+               ->  Values Scan on "*VALUES*"
+               ->  Function Scan on gstest_data
+(8 rows)
  
  select *
    from (values (1),(2)) v(x),
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql

index b28d8217c127a20c9574ee93171d228254a07277..eb680286030d6d952bc7e88c891503dc54ee1a22 100644 (file)
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -342,12 +342,13 @@ explain (costs off)
  
  select a, b, sum(v.x)
    from (values (1),(2)) v(x), gstest_data(v.x)
- group by grouping sets (a,b);
+ group by grouping sets (a,b)
+ order by 1, 2, 3;
  explain (costs off)
    select a, b, sum(v.x)
      from (values (1),(2)) v(x), gstest_data(v.x)
-   group by grouping sets (a,b);
-
+   group by grouping sets (a,b)
+   order by 3, 1, 2;
  select *
    from (values (1),(2)) v(x),
         lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
author	Andres Freund <andres@anarazel.de>
	Mon, 29 Jan 2018 19:02:09 +0000 (11:02 -0800)
committer	Andres Freund <andres@anarazel.de>
	Mon, 29 Jan 2018 19:24:57 +0000 (11:24 -0800)
src/backend/executor/execGrouping.c		patch \| blob \| history
src/test/regress/expected/groupingsets.out		patch \| blob \| history
src/test/regress/sql/groupingsets.sql		patch \| blob \| history