Commit 948c9795 authored by Alvaro Herrera's avatar Alvaro Herrera

Add two HyperLogLog functions

New functions initHyperLogLogError() and freeHyperLogLog() simplify
using this module from elsewhere.

Author: Tomáš Vondra
Review: Peter Geoghegan
parent 9ff60273
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
static inline uint8 rho(uint32 x, uint8 b); static inline uint8 rho(uint32 x, uint8 b);
/* /*
* Initialize HyperLogLog track state * Initialize HyperLogLog track state, by bit width
* *
* bwidth is bit width (so register size will be 2 to the power of bwidth). * bwidth is bit width (so register size will be 2 to the power of bwidth).
* Must be between 4 and 16 inclusive. * Must be between 4 and 16 inclusive.
...@@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth) ...@@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters; cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters;
} }
/*
* Initialize HyperLogLog track state, by error rate
*
* Instead of specifying bwidth (number of bits used for addressing the
* register), this method allows sizing the counter for particular error
* rate using a simple formula from the paper:
*
* e = 1.04 / sqrt(m)
*
* where 'm' is the number of registers, i.e. (2^bwidth). The method
* finds the lowest bwidth with 'e' below the requested error rate, and
* then uses it to initialize the counter.
*
* As bwidth has to be between 4 and 16, the worst possible error rate
* is between ~25% (bwidth=4) and 0.4% (bwidth=16).
*/
void
initHyperLogLogError(hyperLogLogState *cState, double error)
{
uint8 bwidth = 4;
while (bwidth < 16)
{
double m = (Size) 1 << bwidth;
if (1.04 / sqrt(m) < error)
break;
bwidth++;
}
initHyperLogLog(cState, bwidth);
}
/*
* Free HyperLogLog track state
*
* Releases allocated resources, but not the state itself (in case it's not
* allocated by palloc).
*/
void
freeHyperLogLog(hyperLogLogState *cState)
{
Assert(cState->hashesArr != NULL);
pfree(cState->hashesArr);
}
/* /*
* Adds element to the estimator, from caller-supplied hash. * Adds element to the estimator, from caller-supplied hash.
* *
......
...@@ -60,8 +60,10 @@ typedef struct hyperLogLogState ...@@ -60,8 +60,10 @@ typedef struct hyperLogLogState
} hyperLogLogState; } hyperLogLogState;
extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth); extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
extern void initHyperLogLogError(hyperLogLogState *cState, double error);
extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash); extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
extern double estimateHyperLogLog(hyperLogLogState *cState); extern double estimateHyperLogLog(hyperLogLogState *cState);
extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState); extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
extern void freeHyperLogLog(hyperLogLogState *cState);
#endif /* HYPERLOGLOG_H */ #endif /* HYPERLOGLOG_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment