From 948c97958bf37adb2a9c2d6d92c255abfc7499ba Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Tue, 19 Jan 2016 17:40:15 -0300 Subject: [PATCH] Add two HyperLogLog functions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit New functions initHyperLogLogError() and freeHyperLogLog() simplify using this module from elsewhere. Author: Tomáš Vondra Review: Peter Geoghegan --- src/backend/lib/hyperloglog.c | 48 ++++++++++++++++++++++++++++++++++- src/include/lib/hyperloglog.h | 2 ++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/backend/lib/hyperloglog.c b/src/backend/lib/hyperloglog.c index 094bc09a44..fa7f05a241 100644 --- a/src/backend/lib/hyperloglog.c +++ b/src/backend/lib/hyperloglog.c @@ -56,7 +56,7 @@ static inline uint8 rho(uint32 x, uint8 b); /* - * Initialize HyperLogLog track state + * Initialize HyperLogLog track state, by bit width * * bwidth is bit width (so register size will be 2 to the power of bwidth). * Must be between 4 and 16 inclusive. @@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth) cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters; } +/* + * Initialize HyperLogLog track state, by error rate + * + * Instead of specifying bwidth (number of bits used for addressing the + * register), this method allows sizing the counter for particular error + * rate using a simple formula from the paper: + * + * e = 1.04 / sqrt(m) + * + * where 'm' is the number of registers, i.e. (2^bwidth). The method + * finds the lowest bwidth with 'e' below the requested error rate, and + * then uses it to initialize the counter. + * + * As bwidth has to be between 4 and 16, the worst possible error rate + * is between ~25% (bwidth=4) and 0.4% (bwidth=16). + */ +void +initHyperLogLogError(hyperLogLogState *cState, double error) +{ + uint8 bwidth = 4; + + while (bwidth < 16) + { + double m = (Size) 1 << bwidth; + + if (1.04 / sqrt(m) < error) + break; + bwidth++; + } + + initHyperLogLog(cState, bwidth); +} + +/* + * Free HyperLogLog track state + * + * Releases allocated resources, but not the state itself (in case it's not + * allocated by palloc). + */ +void +freeHyperLogLog(hyperLogLogState *cState) +{ + Assert(cState->hashesArr != NULL); + pfree(cState->hashesArr); +} + /* * Adds element to the estimator, from caller-supplied hash. * diff --git a/src/include/lib/hyperloglog.h b/src/include/lib/hyperloglog.h index 5a1d4d356a..b999b3056a 100644 --- a/src/include/lib/hyperloglog.h +++ b/src/include/lib/hyperloglog.h @@ -60,8 +60,10 @@ typedef struct hyperLogLogState } hyperLogLogState; extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth); +extern void initHyperLogLogError(hyperLogLogState *cState, double error); extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash); extern double estimateHyperLogLog(hyperLogLogState *cState); extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState); +extern void freeHyperLogLog(hyperLogLogState *cState); #endif /* HYPERLOGLOG_H */ -- 2.40.0