* by themselves, and secondly ctids for row-marks.
*
* To eliminate duplicates, we build a bitmapset of the needed columns, and
- * then build an array of the columns included in the hashtable. Note that
- * the array is preserved over ExecReScanAgg, so we allocate it in the
- * per-query context (unlike the hash table itself).
+ * then build an array of the columns included in the hashtable. We might
+ * still have duplicates if the passed-in grpColIdx has them, which can happen
+ * in edge cases from semijoins/distinct; these can't always be removed,
+ * because it's not certain that the duplicate cols will be using the same
+ * hash function.
+ *
+ * Note that the array is preserved over ExecReScanAgg, so we allocate it in
+ * the per-query context (unlike the hash table itself).
*/
static void
find_hash_columns(AggState *aggstate)
AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
List *hashTlist = NIL;
TupleDesc hashDesc;
+ int maxCols;
int i;
perhash->largestGrpColIdx = 0;
colnos = bms_del_member(colnos, attnum);
}
}
- /* Add in all the grouping columns */
- for (i = 0; i < perhash->numCols; i++)
- colnos = bms_add_member(colnos, grpColIdx[i]);
+
+ /*
+ * Compute maximum number of input columns accounting for possible
+ * duplications in the grpColIdx array, which can happen in some edge
+ * cases where HashAggregate was generated as part of a semijoin or a
+ * DISTINCT.
+ */
+ maxCols = bms_num_members(colnos) + perhash->numCols;
perhash->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ palloc(maxCols * sizeof(AttrNumber));
perhash->hashGrpColIdxHash =
palloc(perhash->numCols * sizeof(AttrNumber));
+ /* Add all the grouping columns to colnos */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
+
/*
* First build mapping for columns directly hashed. These are the
* first, because they'll be accessed when computing hash values and
ba | 0 | 1
(2 rows)
+-- Make sure that generation of HashAggregate for uniqification purposes
+-- does not lead to array overflow due to unexpected duplicate hash keys
+-- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com
+explain (costs off)
+ select 1 from tenk1
+ where (hundred, thousand) in (select twothousand, twothousand from onek);
+ QUERY PLAN
+-------------------------------------------------------------
+ Hash Join
+ Hash Cond: (tenk1.hundred = onek.twothousand)
+ -> Seq Scan on tenk1
+ Filter: (hundred = thousand)
+ -> Hash
+ -> HashAggregate
+ Group Key: onek.twothousand, onek.twothousand
+ -> Seq Scan on onek
+(8 rows)
+
select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*)
from unnest(array['a','b']) u(v)
group by v||'a' order by 1;
+
+-- Make sure that generation of HashAggregate for uniqification purposes
+-- does not lead to array overflow due to unexpected duplicate hash keys
+-- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com
+explain (costs off)
+ select 1 from tenk1
+ where (hundred, thousand) in (select twothousand, twothousand from onek);