aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-08-14 12:50:54 +0200
committerLuca Deri <deri@ntop.org>2023-08-14 12:50:54 +0200
commit049fbf819caca45c69d8f65ce52e8d6899e44184 (patch)
tree8f539329d835d1e2618722ba482300aec1d8f967
parent3b34941be1d8470938781b391da484aff984c6dd (diff)
Reworked ndpi_filter_xxx implementation using compressed bitmaps
-rw-r--r--example/ndpiReader.c2
-rw-r--r--src/include/ndpi_api.h15
-rw-r--r--src/lib/ndpi_analyze.c1
-rw-r--r--src/lib/ndpi_filter.c75
-rw-r--r--src/lib/third_party/include/MurmurHash3.h2
-rw-r--r--src/lib/third_party/include/binaryfusefilter.h725
-rw-r--r--src/lib/third_party/src/hll/MurmurHash3.c4
-rw-r--r--src/lib/third_party/src/hll/hll.c2
8 files changed, 53 insertions, 773 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 19da0edb0..11f6fc496 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -5324,7 +5324,7 @@ void compressedBitmapUnitTest() {
/* *********************************************** */
void filterUnitTest() {
- ndpi_filter* f = ndpi_filter_alloc(10000);
+ ndpi_filter* f = ndpi_filter_alloc();
u_int32_t v, i;
assert(f);
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 0e519cbf5..0a2322ffe 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1989,16 +1989,17 @@ extern "C" {
/* ******************************* */
/*
- Bloom-filter on steroids
-
- Based on https://github.com/FastFilter/xor_singleheader
+ Bloom-filter on steroids based on ndpi_bitmap
*/
- ndpi_filter* ndpi_filter_alloc(uint32_t elements_number);
- bool ndpi_filter_add(ndpi_filter *f, uint64_t value); /* returns true on success, false on failure */
- bool ndpi_filter_add_multi(ndpi_filter *f, uint64_t *values, u_int32_t num_values); /* Add multiple values */
- bool ndpi_filter_contains(ndpi_filter *f, uint64_t value); /* returns true on success, false on failure */
+ ndpi_filter* ndpi_filter_alloc();
+ bool ndpi_filter_add(ndpi_filter *f, u_int32_t value); /* returns true on success, false on failure */
+ bool ndpi_filter_add_string(ndpi_filter *f, char *string); /* returns true on success, false on failure */
+ bool ndpi_filter_contains(ndpi_filter *f, u_int32_t value); /* returns true on success, false on failure */
+ bool ndpi_filter_contains_string(ndpi_filter *f, char *string); /* returns true on success, false on failure */
void ndpi_filter_free(ndpi_filter *f);
+ size_t ndpi_filter_size(ndpi_filter *f);
+ u_int32_t ndpi_filter_cardinality(ndpi_filter *f);
/* ******************************* */
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index 17f755026..ef3d3cb9a 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -299,7 +299,6 @@ const char* ndpi_data_ratio2str(float ratio) {
/* ********************************************************************************* */
#include "third_party/src/hll/hll.c"
-#include "third_party/src/hll/MurmurHash3.c"
int ndpi_hll_init(struct ndpi_hll *hll, u_int8_t bits) {
return(hll_init(hll, bits));
diff --git a/src/lib/ndpi_filter.c b/src/lib/ndpi_filter.c
index 2c06773fb..1abf9a286 100644
--- a/src/lib/ndpi_filter.c
+++ b/src/lib/ndpi_filter.c
@@ -27,7 +27,6 @@
#include <math.h>
#include <sys/types.h>
-
#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
#include "ndpi_config.h"
@@ -35,71 +34,77 @@
#include "ndpi_includes.h"
#include "ndpi_encryption.h"
-#define malloc ndpi_malloc
-#define calloc ndpi_calloc
-#define realloc ndpi_realloc
-#define free ndpi_free
-
-#include "third_party/include/binaryfusefilter.h"
+#include "third_party/src/hll/MurmurHash3.c"
/* ******************************************* */
-ndpi_filter* ndpi_filter_alloc(uint32_t elements_number) {
- binary_fuse8_t *filter = (binary_fuse8_t*)ndpi_malloc(sizeof(binary_fuse8_t));
-
- if(filter == NULL) return(NULL);
-
- if(!binary_fuse8_allocate(elements_number, filter)) {
- ndpi_free(filter);
- return(NULL);
- } else
- return((ndpi_filter*)filter);
+ndpi_filter* ndpi_filter_alloc() {
+ return((ndpi_filter*)ndpi_bitmap_alloc());
}
/* ******************************************* */
-bool ndpi_filter_add(ndpi_filter *f, uint64_t value) {
+bool ndpi_filter_add(ndpi_filter *f, u_int32_t value) {
if(!f)
return(false);
else {
- binary_fuse8_t *filter = (binary_fuse8_t*)f;
-
- return(binary_fuse8_populate(&value, 1, filter));
+ ndpi_bitmap *filter = (ndpi_bitmap*)f;
+
+ ndpi_bitmap_set(filter, value);
+ return(true);
}
}
/* ******************************************* */
-bool ndpi_filter_add_multi(ndpi_filter *f, uint64_t *values, u_int32_t num_values) {
- if(!f)
- return(false);
- else {
- binary_fuse8_t *filter = (binary_fuse8_t*)f;
-
- return(binary_fuse8_populate(values, num_values, filter));
- }
+bool ndpi_filter_add_string(ndpi_filter *f, char *string) {
+ return(ndpi_filter_add(f, MurmurHash(string, strlen(string), 0xD6DFE7)));
}
/* ******************************************* */
-bool ndpi_filter_contains(ndpi_filter *f, uint64_t value) {
+bool ndpi_filter_contains(ndpi_filter *f, u_int32_t value) {
if(!f)
return(false);
else {
- binary_fuse8_t *filter = (binary_fuse8_t*)f;
+ ndpi_bitmap *filter = (ndpi_bitmap*)f;
- return(binary_fuse8_contain(value, filter));
+ return(ndpi_bitmap_isset(filter, value));
}
}
/* ******************************************* */
+bool ndpi_filter_contains_string(ndpi_filter *f, char *string) {
+ return(ndpi_filter_contains(f, MurmurHash(string, strlen(string), 0xD6DFE7)));
+}
+
+/* ******************************************* */
+
void ndpi_filter_free(ndpi_filter *f) {
if(f != NULL) {
- binary_fuse8_t *filter = (binary_fuse8_t*)f;
+ ndpi_bitmap *filter = (ndpi_bitmap*)f;
- binary_fuse8_free(filter);
- ndpi_free(filter);
+ ndpi_bitmap_free(filter);
}
}
+ /* ******************************************* */
+
+size_t ndpi_filter_size(ndpi_filter *f) {
+ if(f != NULL) {
+ char *buf;
+ size_t s = ndpi_bitmap_serialize(f, &buf);
+
+ if(buf) free(buf);
+ return(s);
+ } else
+ return(0);
+}
+
+ /* ******************************************* */
+
+u_int32_t ndpi_filter_cardinality(ndpi_filter *f) {
+ return(f ? ndpi_bitmap_cardinality(f) : 0);
+}
+
diff --git a/src/lib/third_party/include/MurmurHash3.h b/src/lib/third_party/include/MurmurHash3.h
index a048eb37d..43dc5fd92 100644
--- a/src/lib/third_party/include/MurmurHash3.h
+++ b/src/lib/third_party/include/MurmurHash3.h
@@ -3,6 +3,6 @@
#include <stdint.h>
-uint32_t MurmurHash3_x86_32(const void * key, uint32_t len, uint32_t seed);
+uint32_t MurmurHash(const void * key, uint32_t len, uint32_t seed);
#endif
diff --git a/src/lib/third_party/include/binaryfusefilter.h b/src/lib/third_party/include/binaryfusefilter.h
deleted file mode 100644
index 3b7e5022f..000000000
--- a/src/lib/third_party/include/binaryfusefilter.h
+++ /dev/null
@@ -1,725 +0,0 @@
-#ifndef BINARYFUSEFILTER_H
-#define BINARYFUSEFILTER_H
-#include <math.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef XOR_MAX_ITERATIONS
-#define XOR_MAX_ITERATIONS \
- 100 // probability of success should always be > 0.5 so 100 iterations is
- // highly unlikely
-#endif
-
-/**
- * We start with a few utilities.
- ***/
-static inline uint64_t binary_fuse_murmur64(uint64_t h) {
- h ^= h >> 33;
- h *= UINT64_C(0xff51afd7ed558ccd);
- h ^= h >> 33;
- h *= UINT64_C(0xc4ceb9fe1a85ec53);
- h ^= h >> 33;
- return h;
-}
-static inline uint64_t binary_fuse_mix_split(uint64_t key, uint64_t seed) {
- return binary_fuse_murmur64(key + seed);
-}
-static inline uint64_t binary_fuse_rotl64(uint64_t n, unsigned int c) {
- return (n << (c & 63)) | (n >> ((-c) & 63));
-}
-static inline uint32_t binary_fuse_reduce(uint32_t hash, uint32_t n) {
- // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
- return (uint32_t)(((uint64_t)hash * n) >> 32);
-}
-static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) {
- return hash ^ (hash >> 32);
-}
-
-/**
- * We need a decent random number generator.
- **/
-
-// returns random number, modifies the seed
-static inline uint64_t binary_fuse_rng_splitmix64(uint64_t *seed) {
- uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15));
- z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
- z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
- return z ^ (z >> 31);
-}
-
-typedef struct binary_fuse8_s {
- uint64_t Seed;
- uint32_t SegmentLength;
- uint32_t SegmentLengthMask;
- uint32_t SegmentCount;
- uint32_t SegmentCountLength;
- uint32_t ArrayLength;
- uint8_t *Fingerprints;
-} binary_fuse8_t;
-
-// #ifdefs adapted from:
-// https://stackoverflow.com/a/50958815
-#ifdef __SIZEOF_INT128__ // compilers supporting __uint128, e.g., gcc, clang
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
- return ((__uint128_t)a * b) >> 64;
-}
-#elif defined(_M_X64) || defined(_MARM64) // MSVC
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
- return __umulh(a, b);
-}
-#elif defined(_M_IA64) // also MSVC
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
- unsigned __int64 hi;
- (void) _umul128(a, b, &hi);
- return hi;
-}
-#else // portable implementation using uint64_t
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
- // Adapted from:
- // https://stackoverflow.com/a/51587262
-
- /*
- This is implementing schoolbook multiplication:
-
- a1 a0
- X b1 b0
- -------------
- 00 LOW PART
- -------------
- 00
- 10 10 MIDDLE PART
- + 01
- -------------
- 01
- + 11 11 HIGH PART
- -------------
- */
-
- const uint64_t a0 = (uint32_t) a;
- const uint64_t a1 = a >> 32;
- const uint64_t b0 = (uint32_t) b;
- const uint64_t b1 = b >> 32;
- const uint64_t p11 = a1 * b1;
- const uint64_t p01 = a0 * b1;
- const uint64_t p10 = a1 * b0;
- const uint64_t p00 = a0 * b0;
-
- // 64-bit product + two 32-bit values
- const uint64_t middle = p10 + (p00 >> 32) + (uint32_t) p01;
-
- /*
- Proof that 64-bit products can accumulate two more 32-bit values
- without overflowing:
-
- Max 32-bit value is 2^32 - 1.
- PSum = (2^32-1) * (2^32-1) + (2^32-1) + (2^32-1)
- = 2^64 - 2^32 - 2^32 + 1 + 2^32 - 1 + 2^32 - 1
- = 2^64 - 1
- Therefore the high half below cannot overflow regardless of input.
- */
-
- // high half
- return p11 + (middle >> 32) + (p01 >> 32);
-
- // low half (which we don't care about, but here it is)
- // (middle << 32) | (uint32_t) p00;
-}
-#endif
-
-typedef struct binary_hashes_s {
- uint32_t h0;
- uint32_t h1;
- uint32_t h2;
-} binary_hashes_t;
-
-static inline binary_hashes_t binary_fuse8_hash_batch(uint64_t hash,
- const binary_fuse8_t *filter) {
- uint64_t hi = binary_fuse_mulhi(hash, filter->SegmentCountLength);
- binary_hashes_t ans;
- ans.h0 = (uint32_t)hi;
- ans.h1 = ans.h0 + filter->SegmentLength;
- ans.h2 = ans.h1 + filter->SegmentLength;
- ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask;
- ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask;
- return ans;
-}
-
-static inline uint32_t binary_fuse8_hash(int index, uint64_t hash,
- const binary_fuse8_t *filter) {
- uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength);
- h += index * filter->SegmentLength;
- // keep the lower 36 bits
- uint64_t hh = hash & ((((uint64_t)1UL) << 36) - 1);
- // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift
- h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask);
- return h;
-}
-
-// Report if the key is in the set, with false positive rate.
-static inline bool binary_fuse8_contain(uint64_t key,
- const binary_fuse8_t *filter) {
- uint64_t hash = binary_fuse_mix_split(key, filter->Seed);
- uint8_t f = binary_fuse8_fingerprint(hash);
- binary_hashes_t hashes = binary_fuse8_hash_batch(hash, filter);
- f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^
- filter->Fingerprints[hashes.h2];
- return f == 0;
-}
-
-static inline uint32_t binary_fuse_calculate_segment_length(uint32_t arity,
- uint32_t size) {
- // These parameters are very sensitive. Replacing 'floor' by 'round' can
- // substantially affect the construction time.
- if (arity == 3) {
- return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(3.33) + 2.25));
- } else if (arity == 4) {
- return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(2.91) - 0.5));
- } else {
- return 65536;
- }
-}
-
-static inline double binary_fuse_max(double a, double b) {
- if (a < b) {
- return b;
- }
- return a;
-}
-
-static inline double binary_fuse_calculate_size_factor(uint32_t arity,
- uint32_t size) {
- if (arity == 3) {
- return binary_fuse_max(1.125, 0.875 + 0.25 * log(1000000.0) / log((double)size));
- } else if (arity == 4) {
- return binary_fuse_max(1.075, 0.77 + 0.305 * log(600000.0) / log((double)size));
- } else {
- return 2.0;
- }
-}
-
-// allocate enough capacity for a set containing up to 'size' elements
-// caller is responsible to call binary_fuse8_free(filter)
-// size should be at least 2.
-static inline bool binary_fuse8_allocate(uint32_t size,
- binary_fuse8_t *filter) {
- uint32_t arity = 3;
- filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size);
- if (filter->SegmentLength > 262144) {
- filter->SegmentLength = 262144;
- }
- filter->SegmentLengthMask = filter->SegmentLength - 1;
- double sizeFactor = binary_fuse_calculate_size_factor(arity, size);
- uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor));
- uint32_t initSegmentCount =
- (capacity + filter->SegmentLength - 1) / filter->SegmentLength -
- (arity - 1);
- filter->ArrayLength = (initSegmentCount + arity - 1) * filter->SegmentLength;
- filter->SegmentCount =
- (filter->ArrayLength + filter->SegmentLength - 1) / filter->SegmentLength;
- if (filter->SegmentCount <= arity - 1) {
- filter->SegmentCount = 1;
- } else {
- filter->SegmentCount = filter->SegmentCount - (arity - 1);
- }
- filter->ArrayLength =
- (filter->SegmentCount + arity - 1) * filter->SegmentLength;
- filter->SegmentCountLength = filter->SegmentCount * filter->SegmentLength;
- filter->Fingerprints = (uint8_t*)calloc(1,filter->ArrayLength);
- return filter->Fingerprints != NULL;
-}
-
-// report memory usage
-static inline size_t binary_fuse8_size_in_bytes(const binary_fuse8_t *filter) {
- return filter->ArrayLength * sizeof(uint8_t) + sizeof(binary_fuse8_t);
-}
-
-// release memory
-static inline void binary_fuse8_free(binary_fuse8_t *filter) {
- free(filter->Fingerprints);
- filter->Fingerprints = NULL;
- filter->Seed = 0;
- filter->SegmentLength = 0;
- filter->SegmentLengthMask = 0;
- filter->SegmentCount = 0;
- filter->SegmentCountLength = 0;
- filter->ArrayLength = 0;
-}
-
-static inline uint8_t binary_fuse_mod3(uint8_t x) {
- return x > 2 ? x - 3 : x;
-}
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling binary_fuse8_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size,
- binary_fuse8_t *filter) {
- uint64_t rng_counter = 0x726b2b9d438b9d4d;
- filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
- uint64_t *reverseOrder = (uint64_t *)calloc((size + 1), sizeof(uint64_t));
- uint32_t capacity = filter->ArrayLength;
- uint32_t *alone = (uint32_t *)malloc(capacity * sizeof(uint32_t));
- uint8_t *t2count = (uint8_t *)calloc(capacity, sizeof(uint8_t));
- uint8_t *reverseH = (uint8_t *)malloc(size * sizeof(uint8_t));
- uint64_t *t2hash = (uint64_t *)calloc(capacity, sizeof(uint64_t));
-
- uint32_t blockBits = 1;
- while (((uint32_t)1 << blockBits) < filter->SegmentCount) {
- blockBits += 1;
- }
- uint32_t block = ((uint32_t)1 << blockBits);
- uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t));
- uint32_t h012[5];
-
- if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) ||
- (t2hash == NULL) || (reverseOrder == NULL) || (startPos == NULL)) {
- free(alone);
- free(t2count);
- free(reverseH);
- free(t2hash);
- free(reverseOrder);
- free(startPos);
- return false;
- }
- reverseOrder[size] = 1;
- for (int loop = 0; true; ++loop) {
- if (loop + 1 > XOR_MAX_ITERATIONS) {
- // The probability of this happening is lower than the
- // the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
- // but if it happens, we just fill the fingerprint with ones which
- // will flag all possible keys as 'possible', ensuring a correct result.
- memset(filter->Fingerprints, ~0, filter->ArrayLength);
- free(alone);
- free(t2count);
- free(reverseH);
- free(t2hash);
- free(reverseOrder);
- free(startPos);
- return true;
- }
-
- for (uint32_t i = 0; i < block; i++) {
- // important : i * size would overflow as a 32-bit number in some
- // cases.
- startPos[i] = ((uint64_t)i * size) >> blockBits;
- }
-
- uint64_t maskblock = block - 1;
- for (uint32_t i = 0; i < size; i++) {
- uint64_t hash = binary_fuse_murmur64(keys[i] + filter->Seed);
- uint64_t segment_index = hash >> (64 - blockBits);
- while (reverseOrder[startPos[segment_index]] != 0) {
- segment_index++;
- segment_index &= maskblock;
- }
- reverseOrder[startPos[segment_index]] = hash;
- startPos[segment_index]++;
- }
- int error = 0;
- uint32_t duplicates = 0;
- for (uint32_t i = 0; i < size; i++) {
- uint64_t hash = reverseOrder[i];
- uint32_t h0 = binary_fuse8_hash(0, hash, filter);
- t2count[h0] += 4;
- t2hash[h0] ^= hash;
- uint32_t h1= binary_fuse8_hash(1, hash, filter);
- t2count[h1] += 4;
- t2count[h1] ^= 1;
- t2hash[h1] ^= hash;
- uint32_t h2 = binary_fuse8_hash(2, hash, filter);
- t2count[h2] += 4;
- t2hash[h2] ^= hash;
- t2count[h2] ^= 2;
- if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) {
- if (((t2hash[h0] == 0) && (t2count[h0] == 8))
- || ((t2hash[h1] == 0) && (t2count[h1] == 8))
- || ((t2hash[h2] == 0) && (t2count[h2] == 8))) {
- duplicates += 1;
- t2count[h0] -= 4;
- t2hash[h0] ^= hash;
- t2count[h1] -= 4;
- t2count[h1] ^= 1;
- t2hash[h1] ^= hash;
- t2count[h2] -= 4;
- t2count[h2] ^= 2;
- t2hash[h2] ^= hash;
- }
- }
- error = (t2count[h0] < 4) ? 1 : error;
- error = (t2count[h1] < 4) ? 1 : error;
- error = (t2count[h2] < 4) ? 1 : error;
- }
- if(error) {
- memset(reverseOrder, 0, sizeof(uint64_t) * size);
- memset(t2count, 0, sizeof(uint8_t) * capacity);
- memset(t2hash, 0, sizeof(uint64_t) * capacity);
- filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
- continue;
- }
-
- // End of key addition
- uint32_t Qsize = 0;
- // Add sets with one key to the queue.
- for (uint32_t i = 0; i < capacity; i++) {
- alone[Qsize] = i;
- Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
- }
- uint32_t stacksize = 0;
- while (Qsize > 0) {
- Qsize--;
- uint32_t index = alone[Qsize];
- if ((t2count[index] >> 2) == 1) {
- uint64_t hash = t2hash[index];
-
- //h012[0] = binary_fuse8_hash(0, hash, filter);
- h012[1] = binary_fuse8_hash(1, hash, filter);
- h012[2] = binary_fuse8_hash(2, hash, filter);
- h012[3] = binary_fuse8_hash(0, hash, filter); // == h012[0];
- h012[4] = h012[1];
- uint8_t found = t2count[index] & 3;
- reverseH[stacksize] = found;
- reverseOrder[stacksize] = hash;
- stacksize++;
- uint32_t other_index1 = h012[found + 1];
- alone[Qsize] = other_index1;
- Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
-
- t2count[other_index1] -= 4;
- t2count[other_index1] ^= binary_fuse_mod3(found + 1);
- t2hash[other_index1] ^= hash;
-
- uint32_t other_index2 = h012[found + 2];
- alone[Qsize] = other_index2;
- Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
- t2count[other_index2] -= 4;
- t2count[other_index2] ^= binary_fuse_mod3(found + 2);
- t2hash[other_index2] ^= hash;
- }
- }
- if (stacksize + duplicates == size) {
- // success
- size = stacksize;
- break;
- }
- memset(reverseOrder, 0, sizeof(uint64_t) * size);
- memset(t2count, 0, sizeof(uint8_t) * capacity);
- memset(t2hash, 0, sizeof(uint64_t) * capacity);
- filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
- }
-
- for (uint32_t i = size - 1; i < size; i--) {
- // the hash of the key we insert next
- uint64_t hash = reverseOrder[i];
- uint8_t xor2 = binary_fuse8_fingerprint(hash);
- uint8_t found = reverseH[i];
- h012[0] = binary_fuse8_hash(0, hash, filter);
- h012[1] = binary_fuse8_hash(1, hash, filter);
- h012[2] = binary_fuse8_hash(2, hash, filter);
- h012[3] = h012[0];
- h012[4] = h012[1];
- filter->Fingerprints[h012[found]] = xor2 ^
- filter->Fingerprints[h012[found + 1]] ^
- filter->Fingerprints[h012[found + 2]];
- }
- free(alone);
- free(t2count);
- free(reverseH);
- free(t2hash);
- free(reverseOrder);
- free(startPos);
- return true;
-}
-
-//////////////////
-// fuse16
-//////////////////
-
-typedef struct binary_fuse16_s {
- uint64_t Seed;
- uint32_t SegmentLength;
- uint32_t SegmentLengthMask;
- uint32_t SegmentCount;
- uint32_t SegmentCountLength;
- uint32_t ArrayLength;
- uint16_t *Fingerprints;
-} binary_fuse16_t;
-
-static inline uint64_t binary_fuse16_fingerprint(uint64_t hash) {
- return hash ^ (hash >> 32);
-}
-
-static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash,
- const binary_fuse16_t *filter) {
- uint64_t hi = binary_fuse_mulhi(hash, filter->SegmentCountLength);
- binary_hashes_t ans;
- ans.h0 = (uint32_t)hi;
- ans.h1 = ans.h0 + filter->SegmentLength;
- ans.h2 = ans.h1 + filter->SegmentLength;
- ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask;
- ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask;
- return ans;
-}
-static inline uint32_t binary_fuse16_hash(int index, uint64_t hash,
- const binary_fuse16_t *filter) {
- uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength);
- h += index * filter->SegmentLength;
- // keep the lower 36 bits
- uint64_t hh = hash & ((((uint64_t)1UL) << 36) - 1);
- // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift
- h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask);
- return h;
-}
-
-// Report if the key is in the set, with false positive rate.
-static inline bool binary_fuse16_contain(uint64_t key,
- const binary_fuse16_t *filter) {
- uint64_t hash = binary_fuse_mix_split(key, filter->Seed);
- uint16_t f = binary_fuse16_fingerprint(hash);
- binary_hashes_t hashes = binary_fuse16_hash_batch(hash, filter);
- f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^
- filter->Fingerprints[hashes.h2];
- return f == 0;
-}
-
-
-// allocate enough capacity for a set containing up to 'size' elements
-// caller is responsible to call binary_fuse16_free(filter)
-// size should be at least 2.
-static inline bool binary_fuse16_allocate(uint32_t size,
- binary_fuse16_t *filter) {
- uint32_t arity = 3;
- filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size);
- if (filter->SegmentLength > 262144) {
- filter->SegmentLength = 262144;
- }
- filter->SegmentLengthMask = filter->SegmentLength - 1;
- double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size);
- uint32_t capacity = (uint32_t)(round((double)size * sizeFactor));
- uint32_t initSegmentCount =
- (capacity + filter->SegmentLength - 1) / filter->SegmentLength -
- (arity - 1);
- filter->ArrayLength = (initSegmentCount + arity - 1) * filter->SegmentLength;
- filter->SegmentCount =
- (filter->ArrayLength + filter->SegmentLength - 1) / filter->SegmentLength;
- if (filter->SegmentCount <= arity - 1) {
- filter->SegmentCount = 1;
- } else {
- filter->SegmentCount = filter->SegmentCount - (arity - 1);
- }
- filter->ArrayLength =
- (filter->SegmentCount + arity - 1) * filter->SegmentLength;
- filter->SegmentCountLength = filter->SegmentCount * filter->SegmentLength;
- filter->Fingerprints = (uint16_t*)malloc(filter->ArrayLength * sizeof(uint16_t));
- return filter->Fingerprints != NULL;
-}
-
-// report memory usage
-static inline size_t binary_fuse16_size_in_bytes(const binary_fuse16_t *filter) {
- return filter->ArrayLength * sizeof(uint16_t) + sizeof(binary_fuse16_t);
-}
-
-// release memory
-static inline void binary_fuse16_free(binary_fuse16_t *filter) {
- free(filter->Fingerprints);
- filter->Fingerprints = NULL;
- filter->Seed = 0;
- filter->SegmentLength = 0;
- filter->SegmentLengthMask = 0;
- filter->SegmentCount = 0;
- filter->SegmentCountLength = 0;
- filter->ArrayLength = 0;
-}
-
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling binary_fuse8_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size,
- binary_fuse16_t *filter) {
- uint64_t rng_counter = 0x726b2b9d438b9d4d;
- filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
- uint64_t *reverseOrder = (uint64_t *)calloc((size + 1), sizeof(uint64_t));
- uint32_t capacity = filter->ArrayLength;
- uint32_t *alone = (uint32_t *)malloc(capacity * sizeof(uint32_t));
- uint8_t *t2count = (uint8_t *)calloc(capacity, sizeof(uint8_t));
- uint8_t *reverseH = (uint8_t *)malloc(size * sizeof(uint8_t));
- uint64_t *t2hash = (uint64_t *)calloc(capacity, sizeof(uint64_t));
-
- uint32_t blockBits = 1;
- while (((uint32_t)1 << blockBits) < filter->SegmentCount) {
- blockBits += 1;
- }
- uint32_t block = ((uint32_t)1 << blockBits);
- uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t));
- uint32_t h012[5];
-
- if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) ||
- (t2hash == NULL) || (reverseOrder == NULL) || (startPos == NULL)) {
- free(alone);
- free(t2count);
- free(reverseH);
- free(t2hash);
- free(reverseOrder);
- free(startPos);
- return false;
- }
- reverseOrder[size] = 1;
- for (int loop = 0; true; ++loop) {
- if (loop + 1 > XOR_MAX_ITERATIONS) {
- // The probability of this happening is lower than the
- // the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
- // but if it happens, we just fill the fingerprint with ones which
- // will flag all possible keys as 'possible', ensuring a correct result.
- memset(filter->Fingerprints, ~0, filter->ArrayLength * sizeof(uint16_t));
- free(alone);
- free(t2count);
- free(reverseH);
- free(t2hash);
- free(reverseOrder);
- free(startPos);
- return true;
- }
-
- for (uint32_t i = 0; i < block; i++) {
- // important : i * size would overflow as a 32-bit number in some
- // cases.
- startPos[i] = ((uint64_t)i * size) >> blockBits;
- }
-
- uint64_t maskblock = block - 1;
- for (uint32_t i = 0; i < size; i++) {
- uint64_t hash = binary_fuse_murmur64(keys[i] + filter->Seed);
- uint64_t segment_index = hash >> (64 - blockBits);
- while (reverseOrder[startPos[segment_index]] != 0) {
- segment_index++;
- segment_index &= maskblock;
- }
- reverseOrder[startPos[segment_index]] = hash;
- startPos[segment_index]++;
- }
- int error = 0;
- uint32_t duplicates = 0;
- for (uint32_t i = 0; i < size; i++) {
- uint64_t hash = reverseOrder[i];
- uint32_t h0 = binary_fuse16_hash(0, hash, filter);
- t2count[h0] += 4;
- t2hash[h0] ^= hash;
- uint32_t h1= binary_fuse16_hash(1, hash, filter);
- t2count[h1] += 4;
- t2count[h1] ^= 1;
- t2hash[h1] ^= hash;
- uint32_t h2 = binary_fuse16_hash(2, hash, filter);
- t2count[h2] += 4;
- t2hash[h2] ^= hash;
- t2count[h2] ^= 2;
- if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) {
- if (((t2hash[h0] == 0) && (t2count[h0] == 8))
- || ((t2hash[h1] == 0) && (t2count[h1] == 8))
- || ((t2hash[h2] == 0) && (t2count[h2] == 8))) {
- duplicates += 1;
- t2count[h0] -= 4;
- t2hash[h0] ^= hash;
- t2count[h1] -= 4;
- t2count[h1] ^= 1;
- t2hash[h1] ^= hash;
- t2count[h2] -= 4;
- t2count[h2] ^= 2;
- t2hash[h2] ^= hash;
- }
- }
- error = (t2count[h0] < 4) ? 1 : error;
- error = (t2count[h1] < 4) ? 1 : error;
- error = (t2count[h2] < 4) ? 1 : error;
- }
- if(error) {
- memset(reverseOrder, 0, sizeof(uint64_t) * size);
- memset(t2count, 0, sizeof(uint8_t) * capacity);
- memset(t2hash, 0, sizeof(uint64_t) * capacity);
- filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
- continue;
- }
-
- // End of key addition
- uint32_t Qsize = 0;
- // Add sets with one key to the queue.
- for (uint32_t i = 0; i < capacity; i++) {
- alone[Qsize] = i;
- Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
- }
- uint32_t stacksize = 0;
- while (Qsize > 0) {
- Qsize--;
- uint32_t index = alone[Qsize];
- if ((t2count[index] >> 2) == 1) {
- uint64_t hash = t2hash[index];
-
- //h012[0] = binary_fuse16_hash(0, hash, filter);
- h012[1] = binary_fuse16_hash(1, hash, filter);
- h012[2] = binary_fuse16_hash(2, hash, filter);
- h012[3] = binary_fuse16_hash(0, hash, filter); // == h012[0];
- h012[4] = h012[1];
- uint8_t found = t2count[index] & 3;
- reverseH[stacksize] = found;
- reverseOrder[stacksize] = hash;
- stacksize++;
- uint32_t other_index1 = h012[found + 1];
- alone[Qsize] = other_index1;
- Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
-
- t2count[other_index1] -= 4;
- t2count[other_index1] ^= binary_fuse_mod3(found + 1);
- t2hash[other_index1] ^= hash;
-
- uint32_t other_index2 = h012[found + 2];
- alone[Qsize] = other_index2;
- Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
- t2count[other_index2] -= 4;
- t2count[other_index2] ^= binary_fuse_mod3(found + 2);
- t2hash[other_index2] ^= hash;
- }
- }
- if (stacksize + duplicates == size) {
- // success
- size = stacksize;
- break;
- }
- memset(reverseOrder, 0, sizeof(uint64_t) * size);
- memset(t2count, 0, sizeof(uint8_t) * capacity);
- memset(t2hash, 0, sizeof(uint64_t) * capacity);
- filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
- }
-
- for (uint32_t i = size - 1; i < size; i--) {
- // the hash of the key we insert next
- uint64_t hash = reverseOrder[i];
- uint16_t xor2 = binary_fuse16_fingerprint(hash);
- uint8_t found = reverseH[i];
- h012[0] = binary_fuse16_hash(0, hash, filter);
- h012[1] = binary_fuse16_hash(1, hash, filter);
- h012[2] = binary_fuse16_hash(2, hash, filter);
- h012[3] = h012[0];
- h012[4] = h012[1];
- filter->Fingerprints[h012[found]] = xor2 ^
- filter->Fingerprints[h012[found + 1]] ^
- filter->Fingerprints[h012[found + 2]];
- }
- free(alone);
- free(t2count);
- free(reverseH);
- free(t2hash);
- free(reverseOrder);
- free(startPos);
- return true;
-}
-
-
-
-
-#endif
diff --git a/src/lib/third_party/src/hll/MurmurHash3.c b/src/lib/third_party/src/hll/MurmurHash3.c
index cf395f52b..f8bf0d881 100644
--- a/src/lib/third_party/src/hll/MurmurHash3.c
+++ b/src/lib/third_party/src/hll/MurmurHash3.c
@@ -1,13 +1,13 @@
/*-----------------------------------------------------------------------------
MurmurHash3 was written by Austin Appleby, and is placed in the public
domain. The author hereby disclaims copyright to this source code.
-*/
+xo*/
#include "MurmurHash3.h"
#define ROTL32(x, r) ((x) << (r)) | ((x) >> (32 - (r)))
-u_int32_t MurmurHash3_x86_32(const void *key, u_int32_t len, u_int32_t seed) {
+u_int32_t MurmurHash(const void *key, u_int32_t len, u_int32_t seed) {
const u_int8_t *data = (const u_int8_t *)key;
const int32_t nblocks = (int32_t)len / 4;
diff --git a/src/lib/third_party/src/hll/hll.c b/src/lib/third_party/src/hll/hll.c
index 344971b61..c4ed59885 100644
--- a/src/lib/third_party/src/hll/hll.c
+++ b/src/lib/third_party/src/hll/hll.c
@@ -117,7 +117,7 @@ static __inline int _hll_add_hash(struct ndpi_hll *hll, u_int32_t hash) {
/* Return: 0 = nothing changed, 1 = ranking changed */
int hll_add(struct ndpi_hll *hll, const void *buf, size_t size) {
- u_int32_t hash = MurmurHash3_x86_32((const char *)buf, (u_int32_t)size, 0x5f61767a);
+ u_int32_t hash = MurmurHash((const char *)buf, (u_int32_t)size, 0x5f61767a);
return(_hll_add_hash(hll, hash));
}