aboutsummaryrefslogtreecommitdiff
path: root/src/lib/third_party
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2020-08-11 16:23:35 +0200
committerLuca Deri <deri@ntop.org>2020-08-11 16:23:35 +0200
commit0e363d0ca6ab4f1df16159e3d3b4bebba9372772 (patch)
tree538598c9b70d253e7b9a0ed092ff99fba4e40370 /src/lib/third_party
parent8c2c388d54032f25824c1a0cce4dd379a87bed17 (diff)
Added HLL notes
Diffstat (limited to 'src/lib/third_party')
-rw-r--r--src/lib/third_party/src/hll/hll.c52
1 files changed, 27 insertions, 25 deletions
diff --git a/src/lib/third_party/src/hll/hll.c b/src/lib/third_party/src/hll/hll.c
index a7006c7ed..c526c6af0 100644
--- a/src/lib/third_party/src/hll/hll.c
+++ b/src/lib/third_party/src/hll/hll.c
@@ -34,6 +34,7 @@ u_int32_t _hll_hash(const struct ndpi_hll *hll) {
return MurmurHash3_x86_32(hll->registers, (u_int32_t)hll->size, 0);
}
+/* Count the number of leading zero's */
static __inline u_int8_t _hll_rank(u_int32_t hash, u_int8_t bits) {
u_int8_t i;
@@ -48,24 +49,26 @@ static __inline u_int8_t _hll_rank(u_int32_t hash, u_int8_t bits) {
}
/*
- IMPORTANT: memory usage notes
+ IMPORTANT: HyperLogLog Memory and StandardError Notes
- [i: 4] 16 bytes
- [i: 5] 32 bytes
- [i: 6] 64 bytes
- [i: 7] 128 bytes
- [i: 8] 256 bytes
- [i: 9] 512 bytes
- [i: 10] 1024 bytes
- [i: 11] 2048 bytes
- [i: 12] 4096 bytes
- [i: 13] 8192 bytes
- [i: 14] 16384 bytes
- [i: 15] 32768 bytes
- [i: 16] 65536 bytes
- [i: 17] 131072 bytes
- [i: 18] 262144 bytes
- [i: 19] 524288 bytes
+ StdError = 1.04/sqrt(2^i)
+
+ [i: 4] 16 bytes [StdError: 26% ]
+ [i: 5] 32 bytes [StdError: 18.4%]
+ [i: 6] 64 bytes [StdError: 13% ]
+ [i: 7] 128 bytes [StdError: 9.2% ]
+ [i: 8] 256 bytes [StdError: 6.5% ]
+ [i: 9] 512 bytes [StdError: 4.6% ]
+ [i: 10] 1024 bytes [StdError: 3.25%]
+ [i: 11] 2048 bytes [StdError: 2.3% ]
+ [i: 12] 4096 bytes [StdError: 1.6% ]
+ [i: 13] 8192 bytes [StdError: 1.15%]
+ [i: 14] 16384 bytes [StdError: 0.81%]
+ [i: 15] 32768 bytes [StdError: 0.57%]
+ [i: 16] 65536 bytes [StdError: 0.41%]
+ [i: 17] 131072 bytes [StdError: 0.29%]
+ [i: 18] 262144 bytes [StdError: 0.2% ]
+ [i: 19] 524288 bytes [StdError: 0.14%]
*/
int hll_init(struct ndpi_hll *hll, u_int8_t bits) {
if(bits < 4 || bits > 20) {
@@ -73,9 +76,9 @@ int hll_init(struct ndpi_hll *hll, u_int8_t bits) {
return -1;
}
- hll->bits = bits;
- hll->size = (size_t)1 << bits;
- hll->registers = ndpi_calloc(hll->size, 1);
+ hll->bits = bits; /* Number of bits of buckets number */
+ hll->size = (size_t)1 << bits; /* Number of buckets 2^bits */
+ hll->registers = ndpi_calloc(hll->size, 1); /* Create the bucket register counters */
/* printf("%lu bytes\n", hll->size); */
return 0;
@@ -96,12 +99,11 @@ void hll_reset(struct ndpi_hll *hll) {
static __inline void _hll_add_hash(struct ndpi_hll *hll, u_int32_t hash) {
if(hll->registers) {
- u_int32_t index = hash >> (32 - hll->bits);
- u_int8_t rank = _hll_rank(hash, hll->bits);
+ u_int32_t index = hash >> (32 - hll->bits); /* Use the first 'hll->bits' bits as bucket index */
+ u_int8_t rank = _hll_rank(hash, hll->bits); /* Count the number of leading 0 */
- if(rank > hll->registers[index]) {
- hll->registers[index] = rank;
- }
+ if(rank > hll->registers[index])
+ hll->registers[index] = rank; /* Store the largest number of lesding zeros for the bucket */
}
}