diff options
author | Luca Deri <deri@ntop.org> | 2023-09-05 00:21:16 +0200 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2023-09-05 00:21:16 +0200 |
commit | c5ebd84fd80d75dfdbd93fceb3e2d70a54328a47 (patch) | |
tree | 564f00b4f2cf98d28119c8fb991cf39b8ca32386 /src | |
parent | 4f2ce2d43b24bd86eabbed6127c090b3affa0d01 (diff) |
Added ndpi_bitmap64 support
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h | 16 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 1 | ||||
-rw-r--r-- | src/lib/ndpi_binary_bitmap.c | 1 | ||||
-rw-r--r-- | src/lib/ndpi_bitmap64.c | 79 | ||||
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 33 | ||||
-rw-r--r-- | src/lib/ndpi_hash.c | 15 | ||||
-rw-r--r-- | src/lib/third_party/include/binaryfusefilter.h | 726 |
7 files changed, 852 insertions, 19 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 82c7a307b..76ff9fadd 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -1801,11 +1801,12 @@ extern "C" { /* ******************************* */ u_int32_t ndpi_quick_hash(unsigned char *str, u_int str_len); + u_int64_t ndpi_quick_hash64(char *str, u_int str_len); u_int32_t ndpi_hash_string(char *str); u_int32_t ndpi_rev_hash_string(char *str); u_int32_t ndpi_hash_string_len(char *str, u_int len); u_int32_t ndpi_murmur_hash(char *str, u_int str_len); - + /* ******************************* */ int ndpi_des_init(struct ndpi_des_struct *des, double alpha, double beta, float significance); @@ -2030,6 +2031,19 @@ extern "C" { bool ndpi_bitmap_iterator_next(ndpi_bitmap_iterator* i, u_int32_t *value); /* ******************************* */ + + /* + Bitmap with 64 bit values based + on https://github.com/FastFilter/xor_singleheader/tree/master + */ + + ndpi_bitmap64* ndpi_bitmap64_alloc_size(u_int32_t size); + void ndpi_bitmap64_free(ndpi_bitmap64* b); + void ndpi_bitmap64_set(ndpi_bitmap64* b, u_int64_t value); + bool ndpi_bitmap64_isset(ndpi_bitmap64* b, u_int64_t value); + u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b); + + /* ******************************* */ /* Bloom-filter on steroids based on ndpi_bitmap diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 142eed0a2..db2e53a3f 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1176,6 +1176,7 @@ typedef struct ndpi_proto { #define CUSTOM_CATEGORY_LABEL_LEN 32 typedef void ndpi_bitmap; +typedef void ndpi_bitmap64; typedef void ndpi_bitmap_iterator; typedef void ndpi_filter; diff --git a/src/lib/ndpi_binary_bitmap.c b/src/lib/ndpi_binary_bitmap.c index 79f241ae9..9da67e5fb 100644 --- a/src/lib/ndpi_binary_bitmap.c +++ b/src/lib/ndpi_binary_bitmap.c @@ -146,6 +146,7 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t * ndpi_binary_bitmap_entry_compare); if(rc != NULL) *out_category = rc->category; + return(rc == NULL ? false : true); } else return(false); diff --git a/src/lib/ndpi_bitmap64.c b/src/lib/ndpi_bitmap64.c new file mode 100644 index 000000000..75351af9a --- /dev/null +++ b/src/lib/ndpi_bitmap64.c @@ -0,0 +1,79 @@ +/* + * ndpi_bitmap.c + * + * Copyright (C) 2011-23 - ntop.org and contributors + * + * This file is part of nDPI, an open source deep packet inspection + * library based on the OpenDPI and PACE technology by ipoque GmbH + * + * nDPI is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * nDPI is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with nDPI. If not, see <http://www.gnu.org/licenses/>. + * + */ + + +#include <stdlib.h> +#include <errno.h> +#include <math.h> +#include <sys/types.h> + +#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN + +#include "ndpi_config.h" +#include "ndpi_api.h" +#include "ndpi_includes.h" +#include "ndpi_encryption.h" + +#include "third_party/include/binaryfusefilter.h" + +/* ******************************************* */ + +ndpi_bitmap64* ndpi_bitmap64_alloc_size(u_int32_t num_items) { + binary_fuse16_t *b = (binary_fuse16_t*)ndpi_malloc(sizeof(binary_fuse16_t)); + + if(b == NULL) return(NULL); + + if(binary_fuse16_allocate(num_items, b)) + return((ndpi_bitmap64*)b); + else { + ndpi_free(b); + return(NULL); + } +} + +/* ******************************************* */ + +void ndpi_bitmap64_free(ndpi_bitmap64* b) { + binary_fuse16_free((binary_fuse16_t*)b); + ndpi_free(b); +} + +/* ******************************************* */ + +void ndpi_bitmap64_set(ndpi_bitmap64* b, u_int64_t value) { + binary_fuse16_populate(&value, 1, (binary_fuse16_t*)b); +} + +/* ******************************************* */ + +bool ndpi_bitmap64_isset(ndpi_bitmap64* b, u_int64_t value) { + return(binary_fuse16_contain(value, (binary_fuse16_t*)b)); +} + +/* ******************************************* */ + +u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b) { + return(binary_fuse16_size_in_bytes((binary_fuse16_t*)b)); +} + + diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index 3b458d665..228c7dbc0 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -27,10 +27,12 @@ #include "ndpi_config.h" #include "ndpi_api.h" -// #define DEBUG_ADD -// #define DEBUG_CONTAINS +#if 0 +#define DEBUG_ADD +#define DEBUG_CONTAINS +#endif -// #define USE_BINARY_BITMAP +#define USE_BINARY_BITMAP #ifdef USE_BINARY_BITMAP @@ -72,13 +74,9 @@ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *c) { bool ndpi_domain_classify_add(ndpi_domain_classify *c, u_int8_t class_id, char *domain) { - u_int64_t hash1, hash2, hash; + u_int64_t hash; char *dot = strrchr(domain, '.'); -#ifdef DEBUG_ADD - printf("[add] Trying to add %s\n", domain); -#endif - if(!dot) return(false); if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); @@ -86,15 +84,15 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *c, /* Skip heading dots */ while(domain[0] == '.') domain++; - hash1 = ndpi_hash_string(domain), hash2 = ndpi_rev_hash_string(domain); - hash = (hash1 << 32) | hash2; + hash = ndpi_quick_hash64(domain, strlen(domain)); #ifdef DEBUG_ADD printf("[add] %s @ %u [hash: %llu]\n", domain, class_id, hash); - if(ndpi_binary_bitmap_isset(c->bitmap, hash, class_id)) { +#if 0 + if(ndpi_binary_bitmap_isset(c->bitmap, hash, &class_id)) printf("[add] False positive %s @ %u [hash: %llu]\n", domain, class_id, hash); - } +#endif #endif return(ndpi_binary_bitmap_set(c->bitmap, hash, class_id)); @@ -184,11 +182,10 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *c, elem = domain; while(true) { - u_int64_t hash1, hash2, hash; - - hash1 = ndpi_hash_string(elem), hash2 = ndpi_rev_hash_string(elem); - hash = (hash1 << 32) | hash2; + u_int64_t hash; + hash = ndpi_quick_hash64(elem, strlen(elem)); + #ifdef DEBUG_CONTAINS printf("[contains] Searching %s [hash: %llu]\n", elem, hash); #endif @@ -314,7 +311,7 @@ static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) { if(elem[0] == '.') elem = &elem[1]; h = ndpi_hash_string(elem); - + if(elem == domain) { /* We're adding the beginning of the domain, hence the last token before quitting */ h += END_OF_TOKENS_DELIMITER; @@ -368,7 +365,7 @@ static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain if(elem[0] == '.') elem = &elem[1]; h = ndpi_hash_string(elem); - + if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) { /* Exact match does not work, so let's see if a partial match works instead */ diff --git a/src/lib/ndpi_hash.c b/src/lib/ndpi_hash.c index 8d78749fd..8dd7beab0 100644 --- a/src/lib/ndpi_hash.c +++ b/src/lib/ndpi_hash.c @@ -48,6 +48,21 @@ u_int32_t ndpi_quick_hash(unsigned char *str, u_int str_len) { /* ******************************************************************** */ +/* Based on Daniel Lemire code */ +u_int64_t ndpi_quick_hash64(char *str, u_int str_len) { + u_int64_t h = 0; + u_int i; + + for(i=0; i<str_len; i++) + h = (h * 177) + str[i]; + + h ^= strlen(str); + + return h; +} + +/* ******************************************************************** */ + /* https://en.wikipedia.org/wiki/Jenkins_hash_function diff --git a/src/lib/third_party/include/binaryfusefilter.h b/src/lib/third_party/include/binaryfusefilter.h new file mode 100644 index 000000000..e884bdb79 --- /dev/null +++ b/src/lib/third_party/include/binaryfusefilter.h @@ -0,0 +1,726 @@ +/* https://github.com/FastFilter/xor_singleheader */ +#ifndef BINARYFUSEFILTER_H +#define BINARYFUSEFILTER_H +#include <math.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef XOR_MAX_ITERATIONS +#define XOR_MAX_ITERATIONS \ + 100 // probability of success should always be > 0.5 so 100 iterations is + // highly unlikely +#endif + +/** + * We start with a few utilities. + ***/ +static inline uint64_t binary_fuse_murmur64(uint64_t h) { + h ^= h >> 33; + h *= UINT64_C(0xff51afd7ed558ccd); + h ^= h >> 33; + h *= UINT64_C(0xc4ceb9fe1a85ec53); + h ^= h >> 33; + return h; +} +static inline uint64_t binary_fuse_mix_split(uint64_t key, uint64_t seed) { + return binary_fuse_murmur64(key + seed); +} +static inline uint64_t binary_fuse_rotl64(uint64_t n, unsigned int c) { + return (n << (c & 63)) | (n >> ((-c) & 63)); +} +static inline uint32_t binary_fuse_reduce(uint32_t hash, uint32_t n) { + // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + return (uint32_t)(((uint64_t)hash * n) >> 32); +} +static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) { + return hash ^ (hash >> 32); +} + +/** + * We need a decent random number generator. + **/ + +// returns random number, modifies the seed +static inline uint64_t binary_fuse_rng_splitmix64(uint64_t *seed) { + uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15)); + z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); + return z ^ (z >> 31); +} + +typedef struct binary_fuse8_s { + uint64_t Seed; + uint32_t SegmentLength; + uint32_t SegmentLengthMask; + uint32_t SegmentCount; + uint32_t SegmentCountLength; + uint32_t ArrayLength; + uint8_t *Fingerprints; +} binary_fuse8_t; + +// #ifdefs adapted from: +// https://stackoverflow.com/a/50958815 +#ifdef __SIZEOF_INT128__ // compilers supporting __uint128, e.g., gcc, clang +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { + return ((__uint128_t)a * b) >> 64; +} +#elif defined(_M_X64) || defined(_MARM64) // MSVC +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { + return __umulh(a, b); +} +#elif defined(_M_IA64) // also MSVC +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { + unsigned __int64 hi; + (void) _umul128(a, b, &hi); + return hi; +} +#else // portable implementation using uint64_t +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { + // Adapted from: + // https://stackoverflow.com/a/51587262 + + /* + This is implementing schoolbook multiplication: + + a1 a0 + X b1 b0 + ------------- + 00 LOW PART + ------------- + 00 + 10 10 MIDDLE PART + + 01 + ------------- + 01 + + 11 11 HIGH PART + ------------- + */ + + const uint64_t a0 = (uint32_t) a; + const uint64_t a1 = a >> 32; + const uint64_t b0 = (uint32_t) b; + const uint64_t b1 = b >> 32; + const uint64_t p11 = a1 * b1; + const uint64_t p01 = a0 * b1; + const uint64_t p10 = a1 * b0; + const uint64_t p00 = a0 * b0; + + // 64-bit product + two 32-bit values + const uint64_t middle = p10 + (p00 >> 32) + (uint32_t) p01; + + /* + Proof that 64-bit products can accumulate two more 32-bit values + without overflowing: + + Max 32-bit value is 2^32 - 1. + PSum = (2^32-1) * (2^32-1) + (2^32-1) + (2^32-1) + = 2^64 - 2^32 - 2^32 + 1 + 2^32 - 1 + 2^32 - 1 + = 2^64 - 1 + Therefore the high half below cannot overflow regardless of input. + */ + + // high half + return p11 + (middle >> 32) + (p01 >> 32); + + // low half (which we don't care about, but here it is) + // (middle << 32) | (uint32_t) p00; +} +#endif + +typedef struct binary_hashes_s { + uint32_t h0; + uint32_t h1; + uint32_t h2; +} binary_hashes_t; + +static inline binary_hashes_t binary_fuse8_hash_batch(uint64_t hash, + const binary_fuse8_t *filter) { + uint64_t hi = binary_fuse_mulhi(hash, filter->SegmentCountLength); + binary_hashes_t ans; + ans.h0 = (uint32_t)hi; + ans.h1 = ans.h0 + filter->SegmentLength; + ans.h2 = ans.h1 + filter->SegmentLength; + ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask; + ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask; + return ans; +} + +static inline uint32_t binary_fuse8_hash(int index, uint64_t hash, + const binary_fuse8_t *filter) { + uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength); + h += index * filter->SegmentLength; + // keep the lower 36 bits + uint64_t hh = hash & ((1UL << 36) - 1); + // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift + h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask); + return h; +} + +// Report if the key is in the set, with false positive rate. +static inline bool binary_fuse8_contain(uint64_t key, + const binary_fuse8_t *filter) { + uint64_t hash = binary_fuse_mix_split(key, filter->Seed); + uint8_t f = binary_fuse8_fingerprint(hash); + binary_hashes_t hashes = binary_fuse8_hash_batch(hash, filter); + f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^ + filter->Fingerprints[hashes.h2]; + return f == 0; +} + +static inline uint32_t binary_fuse_calculate_segment_length(uint32_t arity, + uint32_t size) { + // These parameters are very sensitive. Replacing 'floor' by 'round' can + // substantially affect the construction time. + if (arity == 3) { + return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(3.33) + 2.25)); + } else if (arity == 4) { + return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(2.91) - 0.5)); + } else { + return 65536; + } +} + +static inline double binary_fuse_max(double a, double b) { + if (a < b) { + return b; + } + return a; +} + +static inline double binary_fuse_calculate_size_factor(uint32_t arity, + uint32_t size) { + if (arity == 3) { + return binary_fuse_max(1.125, 0.875 + 0.25 * log(1000000.0) / log((double)size)); + } else if (arity == 4) { + return binary_fuse_max(1.075, 0.77 + 0.305 * log(600000.0) / log((double)size)); + } else { + return 2.0; + } +} + +// allocate enough capacity for a set containing up to 'size' elements +// caller is responsible to call binary_fuse8_free(filter) +// size should be at least 2. +static inline bool binary_fuse8_allocate(uint32_t size, + binary_fuse8_t *filter) { + uint32_t arity = 3; + filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size); + if (filter->SegmentLength > 262144) { + filter->SegmentLength = 262144; + } + filter->SegmentLengthMask = filter->SegmentLength - 1; + double sizeFactor = binary_fuse_calculate_size_factor(arity, size); + uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor)); + uint32_t initSegmentCount = + (capacity + filter->SegmentLength - 1) / filter->SegmentLength - + (arity - 1); + filter->ArrayLength = (initSegmentCount + arity - 1) * filter->SegmentLength; + filter->SegmentCount = + (filter->ArrayLength + filter->SegmentLength - 1) / filter->SegmentLength; + if (filter->SegmentCount <= arity - 1) { + filter->SegmentCount = 1; + } else { + filter->SegmentCount = filter->SegmentCount - (arity - 1); + } + filter->ArrayLength = + (filter->SegmentCount + arity - 1) * filter->SegmentLength; + filter->SegmentCountLength = filter->SegmentCount * filter->SegmentLength; + filter->Fingerprints = (uint8_t*)ndpi_malloc(filter->ArrayLength); + return filter->Fingerprints != NULL; +} + +// report memory usage +static inline size_t binary_fuse8_size_in_bytes(const binary_fuse8_t *filter) { + return filter->ArrayLength * sizeof(uint8_t) + sizeof(binary_fuse8_t); +} + +// release memory +static inline void binary_fuse8_free(binary_fuse8_t *filter) { + ndpi_free(filter->Fingerprints); + filter->Fingerprints = NULL; + filter->Seed = 0; + filter->SegmentLength = 0; + filter->SegmentLengthMask = 0; + filter->SegmentCount = 0; + filter->SegmentCountLength = 0; + filter->ArrayLength = 0; +} + +static inline uint8_t binary_fuse_mod3(uint8_t x) { + return x > 2 ? x - 3 : x; +} + +// Construct the filter, returns true on success, false on failure. +// The algorithm fails when there is insufficient memory. +// The caller is responsable for calling binary_fuse8_allocate(size,filter) +// before. For best performance, the caller should ensure that there are not too +// many duplicated keys. +static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size, + binary_fuse8_t *filter) { + uint64_t rng_counter = 0x726b2b9d438b9d4d; + filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); + uint64_t *reverseOrder = (uint64_t *)ndpi_calloc((size + 1), sizeof(uint64_t)); + uint32_t capacity = filter->ArrayLength; + uint32_t *alone = (uint32_t *)ndpi_malloc(capacity * sizeof(uint32_t)); + uint8_t *t2count = (uint8_t *)ndpi_calloc(capacity, sizeof(uint8_t)); + uint8_t *reverseH = (uint8_t *)ndpi_malloc(size * sizeof(uint8_t)); + uint64_t *t2hash = (uint64_t *)ndpi_calloc(capacity, sizeof(uint64_t)); + + uint32_t blockBits = 1; + while (((uint32_t)1 << blockBits) < filter->SegmentCount) { + blockBits += 1; + } + uint32_t block = ((uint32_t)1 << blockBits); + uint32_t *startPos = (uint32_t *)ndpi_malloc((1 << blockBits) * sizeof(uint32_t)); + uint32_t h012[5]; + + if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) || + (t2hash == NULL) || (reverseOrder == NULL) || (startPos == NULL)) { + ndpi_free(alone); + ndpi_free(t2count); + ndpi_free(reverseH); + ndpi_free(t2hash); + ndpi_free(reverseOrder); + ndpi_free(startPos); + return false; + } + reverseOrder[size] = 1; + for (int loop = 0; true; ++loop) { + if (loop + 1 > XOR_MAX_ITERATIONS) { + // The probability of this happening is lower than the + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), + // but if it happens, we just fill the fingerprint with ones which + // will flag all possible keys as 'possible', ensuring a correct result. + memset(filter->Fingerprints, ~0, filter->ArrayLength); + ndpi_free(alone); + ndpi_free(t2count); + ndpi_free(reverseH); + ndpi_free(t2hash); + ndpi_free(reverseOrder); + ndpi_free(startPos); + return true; + } + + for (uint32_t i = 0; i < block; i++) { + // important : i * size would overflow as a 32-bit number in some + // cases. + startPos[i] = ((uint64_t)i * size) >> blockBits; + } + + uint64_t maskblock = block - 1; + for (uint32_t i = 0; i < size; i++) { + uint64_t hash = binary_fuse_murmur64(keys[i] + filter->Seed); + uint64_t segment_index = hash >> (64 - blockBits); + while (reverseOrder[startPos[segment_index]] != 0) { + segment_index++; + segment_index &= maskblock; + } + reverseOrder[startPos[segment_index]] = hash; + startPos[segment_index]++; + } + int error = 0; + uint32_t duplicates = 0; + for (uint32_t i = 0; i < size; i++) { + uint64_t hash = reverseOrder[i]; + uint32_t h0 = binary_fuse8_hash(0, hash, filter); + t2count[h0] += 4; + t2hash[h0] ^= hash; + uint32_t h1= binary_fuse8_hash(1, hash, filter); + t2count[h1] += 4; + t2count[h1] ^= 1; + t2hash[h1] ^= hash; + uint32_t h2 = binary_fuse8_hash(2, hash, filter); + t2count[h2] += 4; + t2hash[h2] ^= hash; + t2count[h2] ^= 2; + if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) { + if (((t2hash[h0] == 0) && (t2count[h0] == 8)) + || ((t2hash[h1] == 0) && (t2count[h1] == 8)) + || ((t2hash[h2] == 0) && (t2count[h2] == 8))) { + duplicates += 1; + t2count[h0] -= 4; + t2hash[h0] ^= hash; + t2count[h1] -= 4; + t2count[h1] ^= 1; + t2hash[h1] ^= hash; + t2count[h2] -= 4; + t2count[h2] ^= 2; + t2hash[h2] ^= hash; + } + } + error = (t2count[h0] < 4) ? 1 : error; + error = (t2count[h1] < 4) ? 1 : error; + error = (t2count[h2] < 4) ? 1 : error; + } + if(error) { + memset(reverseOrder, 0, sizeof(uint64_t) * size); + memset(t2count, 0, sizeof(uint8_t) * capacity); + memset(t2hash, 0, sizeof(uint64_t) * capacity); + filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); + continue; + } + + // End of key addition + uint32_t Qsize = 0; + // Add sets with one key to the queue. + for (uint32_t i = 0; i < capacity; i++) { + alone[Qsize] = i; + Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0; + } + uint32_t stacksize = 0; + while (Qsize > 0) { + Qsize--; + uint32_t index = alone[Qsize]; + if ((t2count[index] >> 2) == 1) { + uint64_t hash = t2hash[index]; + + //h012[0] = binary_fuse8_hash(0, hash, filter); + h012[1] = binary_fuse8_hash(1, hash, filter); + h012[2] = binary_fuse8_hash(2, hash, filter); + h012[3] = binary_fuse8_hash(0, hash, filter); // == h012[0]; + h012[4] = h012[1]; + uint8_t found = t2count[index] & 3; + reverseH[stacksize] = found; + reverseOrder[stacksize] = hash; + stacksize++; + uint32_t other_index1 = h012[found + 1]; + alone[Qsize] = other_index1; + Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0); + + t2count[other_index1] -= 4; + t2count[other_index1] ^= binary_fuse_mod3(found + 1); + t2hash[other_index1] ^= hash; + + uint32_t other_index2 = h012[found + 2]; + alone[Qsize] = other_index2; + Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0); + t2count[other_index2] -= 4; + t2count[other_index2] ^= binary_fuse_mod3(found + 2); + t2hash[other_index2] ^= hash; + } + } + if (stacksize + duplicates == size) { + // success + size = stacksize; + break; + } + memset(reverseOrder, 0, sizeof(uint64_t) * size); + memset(t2count, 0, sizeof(uint8_t) * capacity); + memset(t2hash, 0, sizeof(uint64_t) * capacity); + filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); + } + + for (uint32_t i = size - 1; i < size; i--) { + // the hash of the key we insert next + uint64_t hash = reverseOrder[i]; + uint8_t xor2 = binary_fuse8_fingerprint(hash); + uint8_t found = reverseH[i]; + h012[0] = binary_fuse8_hash(0, hash, filter); + h012[1] = binary_fuse8_hash(1, hash, filter); + h012[2] = binary_fuse8_hash(2, hash, filter); + h012[3] = h012[0]; + h012[4] = h012[1]; + filter->Fingerprints[h012[found]] = xor2 ^ + filter->Fingerprints[h012[found + 1]] ^ + filter->Fingerprints[h012[found + 2]]; + } + ndpi_free(alone); + ndpi_free(t2count); + ndpi_free(reverseH); + ndpi_free(t2hash); + ndpi_free(reverseOrder); + ndpi_free(startPos); + return true; +} + +////////////////// +// fuse16 +////////////////// + +typedef struct binary_fuse16_s { + uint64_t Seed; + uint32_t SegmentLength; + uint32_t SegmentLengthMask; + uint32_t SegmentCount; + uint32_t SegmentCountLength; + uint32_t ArrayLength; + uint16_t *Fingerprints; +} binary_fuse16_t; + +static inline uint64_t binary_fuse16_fingerprint(uint64_t hash) { + return hash ^ (hash >> 32); +} + +static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash, + const binary_fuse16_t *filter) { + uint64_t hi = binary_fuse_mulhi(hash, filter->SegmentCountLength); + binary_hashes_t ans; + ans.h0 = (uint32_t)hi; + ans.h1 = ans.h0 + filter->SegmentLength; + ans.h2 = ans.h1 + filter->SegmentLength; + ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask; + ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask; + return ans; +} +static inline uint32_t binary_fuse16_hash(int index, uint64_t hash, + const binary_fuse16_t *filter) { + uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength); + h += index * filter->SegmentLength; + // keep the lower 36 bits + uint64_t hh = hash & ((1UL << 36) - 1); + // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift + h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask); + return h; +} + +// Report if the key is in the set, with false positive rate. +static inline bool binary_fuse16_contain(uint64_t key, + const binary_fuse16_t *filter) { + uint64_t hash = binary_fuse_mix_split(key, filter->Seed); + uint16_t f = binary_fuse16_fingerprint(hash); + binary_hashes_t hashes = binary_fuse16_hash_batch(hash, filter); + f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^ + filter->Fingerprints[hashes.h2]; + return f == 0; +} + + +// allocate enough capacity for a set containing up to 'size' elements +// caller is responsible to call binary_fuse16_free(filter) +// size should be at least 2. +static inline bool binary_fuse16_allocate(uint32_t size, + binary_fuse16_t *filter) { + uint32_t arity = 3; + filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size); + if (filter->SegmentLength > 262144) { + filter->SegmentLength = 262144; + } + filter->SegmentLengthMask = filter->SegmentLength - 1; + double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size); + uint32_t capacity = (uint32_t)(round((double)size * sizeFactor)); + uint32_t initSegmentCount = + (capacity + filter->SegmentLength - 1) / filter->SegmentLength - + (arity - 1); + filter->ArrayLength = (initSegmentCount + arity - 1) * filter->SegmentLength; + filter->SegmentCount = + (filter->ArrayLength + filter->SegmentLength - 1) / filter->SegmentLength; + if (filter->SegmentCount <= arity - 1) { + filter->SegmentCount = 1; + } else { + filter->SegmentCount = filter->SegmentCount - (arity - 1); + } + filter->ArrayLength = + (filter->SegmentCount + arity - 1) * filter->SegmentLength; + filter->SegmentCountLength = filter->SegmentCount * filter->SegmentLength; + filter->Fingerprints = (uint16_t*)ndpi_malloc(filter->ArrayLength * sizeof(uint16_t)); + return filter->Fingerprints != NULL; +} + +// report memory usage +static inline size_t binary_fuse16_size_in_bytes(const binary_fuse16_t *filter) { + return filter->ArrayLength * sizeof(uint16_t) + sizeof(binary_fuse16_t); +} + +// release memory +static inline void binary_fuse16_free(binary_fuse16_t *filter) { + ndpi_free(filter->Fingerprints); + filter->Fingerprints = NULL; + filter->Seed = 0; + filter->SegmentLength = 0; + filter->SegmentLengthMask = 0; + filter->SegmentCount = 0; + filter->SegmentCountLength = 0; + filter->ArrayLength = 0; +} + + +// Construct the filter, returns true on success, false on failure. +// The algorithm fails when there is insufficient memory. +// The caller is responsable for calling binary_fuse8_allocate(size,filter) +// before. For best performance, the caller should ensure that there are not too +// many duplicated keys. +static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size, + binary_fuse16_t *filter) { + uint64_t rng_counter = 0x726b2b9d438b9d4d; + filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); + uint64_t *reverseOrder = (uint64_t *)ndpi_calloc((size + 1), sizeof(uint64_t)); + uint32_t capacity = filter->ArrayLength; + uint32_t *alone = (uint32_t *)ndpi_malloc(capacity * sizeof(uint32_t)); + uint8_t *t2count = (uint8_t *)ndpi_calloc(capacity, sizeof(uint8_t)); + uint8_t *reverseH = (uint8_t *)ndpi_malloc(size * sizeof(uint8_t)); + uint64_t *t2hash = (uint64_t *)ndpi_calloc(capacity, sizeof(uint64_t)); + + uint32_t blockBits = 1; + while (((uint32_t)1 << blockBits) < filter->SegmentCount) { + blockBits += 1; + } + uint32_t block = ((uint32_t)1 << blockBits); + uint32_t *startPos = (uint32_t *)ndpi_malloc((1 << blockBits) * sizeof(uint32_t)); + uint32_t h012[5]; + + if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) || + (t2hash == NULL) || (reverseOrder == NULL) || (startPos == NULL)) { + ndpi_free(alone); + ndpi_free(t2count); + ndpi_free(reverseH); + ndpi_free(t2hash); + ndpi_free(reverseOrder); + ndpi_free(startPos); + return false; + } + reverseOrder[size] = 1; + for (int loop = 0; true; ++loop) { + if (loop + 1 > XOR_MAX_ITERATIONS) { + // The probability of this happening is lower than the + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), + // but if it happens, we just fill the fingerprint with ones which + // will flag all possible keys as 'possible', ensuring a correct result. + memset(filter->Fingerprints, ~0, filter->ArrayLength * sizeof(uint16_t)); + ndpi_free(alone); + ndpi_free(t2count); + ndpi_free(reverseH); + ndpi_free(t2hash); + ndpi_free(reverseOrder); + ndpi_free(startPos); + return true; + } + + for (uint32_t i = 0; i < block; i++) { + // important : i * size would overflow as a 32-bit number in some + // cases. + startPos[i] = ((uint64_t)i * size) >> blockBits; + } + + uint64_t maskblock = block - 1; + for (uint32_t i = 0; i < size; i++) { + uint64_t hash = binary_fuse_murmur64(keys[i] + filter->Seed); + uint64_t segment_index = hash >> (64 - blockBits); + while (reverseOrder[startPos[segment_index]] != 0) { + segment_index++; + segment_index &= maskblock; + } + reverseOrder[startPos[segment_index]] = hash; + startPos[segment_index]++; + } + int error = 0; + uint32_t duplicates = 0; + for (uint32_t i = 0; i < size; i++) { + uint64_t hash = reverseOrder[i]; + uint32_t h0 = binary_fuse16_hash(0, hash, filter); + t2count[h0] += 4; + t2hash[h0] ^= hash; + uint32_t h1= binary_fuse16_hash(1, hash, filter); + t2count[h1] += 4; + t2count[h1] ^= 1; + t2hash[h1] ^= hash; + uint32_t h2 = binary_fuse16_hash(2, hash, filter); + t2count[h2] += 4; + t2hash[h2] ^= hash; + t2count[h2] ^= 2; + if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) { + if (((t2hash[h0] == 0) && (t2count[h0] == 8)) + || ((t2hash[h1] == 0) && (t2count[h1] == 8)) + || ((t2hash[h2] == 0) && (t2count[h2] == 8))) { + duplicates += 1; + t2count[h0] -= 4; + t2hash[h0] ^= hash; + t2count[h1] -= 4; + t2count[h1] ^= 1; + t2hash[h1] ^= hash; + t2count[h2] -= 4; + t2count[h2] ^= 2; + t2hash[h2] ^= hash; + } + } + error = (t2count[h0] < 4) ? 1 : error; + error = (t2count[h1] < 4) ? 1 : error; + error = (t2count[h2] < 4) ? 1 : error; + } + if(error) { + memset(reverseOrder, 0, sizeof(uint64_t) * size); + memset(t2count, 0, sizeof(uint8_t) * capacity); + memset(t2hash, 0, sizeof(uint64_t) * capacity); + filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); + continue; + } + + // End of key addition + uint32_t Qsize = 0; + // Add sets with one key to the queue. + for (uint32_t i = 0; i < capacity; i++) { + alone[Qsize] = i; + Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0; + } + uint32_t stacksize = 0; + while (Qsize > 0) { + Qsize--; + uint32_t index = alone[Qsize]; + if ((t2count[index] >> 2) == 1) { + uint64_t hash = t2hash[index]; + + //h012[0] = binary_fuse16_hash(0, hash, filter); + h012[1] = binary_fuse16_hash(1, hash, filter); + h012[2] = binary_fuse16_hash(2, hash, filter); + h012[3] = binary_fuse16_hash(0, hash, filter); // == h012[0]; + h012[4] = h012[1]; + uint8_t found = t2count[index] & 3; + reverseH[stacksize] = found; + reverseOrder[stacksize] = hash; + stacksize++; + uint32_t other_index1 = h012[found + 1]; + alone[Qsize] = other_index1; + Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0); + + t2count[other_index1] -= 4; + t2count[other_index1] ^= binary_fuse_mod3(found + 1); + t2hash[other_index1] ^= hash; + + uint32_t other_index2 = h012[found + 2]; + alone[Qsize] = other_index2; + Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0); + t2count[other_index2] -= 4; + t2count[other_index2] ^= binary_fuse_mod3(found + 2); + t2hash[other_index2] ^= hash; + } + } + if (stacksize + duplicates == size) { + // success + size = stacksize; + break; + } + memset(reverseOrder, 0, sizeof(uint64_t) * size); + memset(t2count, 0, sizeof(uint8_t) * capacity); + memset(t2hash, 0, sizeof(uint64_t) * capacity); + filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); + } + + for (uint32_t i = size - 1; i < size; i--) { + // the hash of the key we insert next + uint64_t hash = reverseOrder[i]; + uint16_t xor2 = binary_fuse16_fingerprint(hash); + uint8_t found = reverseH[i]; + h012[0] = binary_fuse16_hash(0, hash, filter); + h012[1] = binary_fuse16_hash(1, hash, filter); + h012[2] = binary_fuse16_hash(2, hash, filter); + h012[3] = h012[0]; + h012[4] = h012[1]; + filter->Fingerprints[h012[found]] = xor2 ^ + filter->Fingerprints[h012[found + 1]] ^ + filter->Fingerprints[h012[found + 2]]; + } + ndpi_free(alone); + ndpi_free(t2count); + ndpi_free(reverseH); + ndpi_free(t2hash); + ndpi_free(reverseOrder); + ndpi_free(startPos); + return true; +} + + + + +#endif |