diff options
Diffstat (limited to 'src/lib/ndpi_analyze.c')
-rw-r--r-- | src/lib/ndpi_analyze.c | 117 |
1 files changed, 116 insertions, 1 deletions
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index d9828ad62..62d14fdd4 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -266,7 +266,7 @@ void ndpi_data_print_window_values(struct ndpi_analyze_struct *s) { for(i=0; i<n; i++) printf("[%u: %" PRIu64 "]", i, s->values[i]); - + printf("\n"); } } @@ -1715,3 +1715,118 @@ u_int32_t ndpi_crc32(const void* data, size_t n_bytes) { __crc32(data, n_bytes, &crc); return crc; } + +/* ********************************************************************************* */ +/* ********************************************************************************* */ + +/* + Count-Min Sketch: Memory Usage + + https://florian.github.io/count-min-sketch/ + https://medium.com/@nehasingh18.9/count-min-sketch-for-beginners-f1e441bbe7a4 + https://sites.google.com/site/countminsketch/code + + [Depth: 8][Total memory: 1040] + [Depth: 16][Total memory: 2064] + [Depth: 32][Total memory: 4112] + [Depth: 64][Total memory: 8208] + [Depth: 256][Total memory: 32784] + [Depth: 512][Total memory: 65552] + [Depth: 1024][Total memory: 131088] + [Depth: 2048][Total memory: 262160] + [Depth: 4096][Total memory: 524304] + [Depth: 8192][Total memory: 1048592] +*/ + +#define NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS 1024 + +// #define DEBUG + +struct ndpi_cm_sketch *ndpi_cm_sketch_init(u_int16_t num_hashes) { +#ifdef DEBUG + u_int32_t tot_mem; +#endif + u_int32_t len; + struct ndpi_cm_sketch *sketch; + + len = sizeof(struct ndpi_cm_sketch); + sketch = (struct ndpi_cm_sketch*)ndpi_malloc(len); + + if(!sketch) + return(NULL); + +#ifdef DEBUG + tot_mem = len; +#endif + + if(num_hashes < 2) num_hashes = 2; + + sketch->num_hashes = num_hashes; + sketch->num_hash_buckets = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS; + sketch->num_hash_buckets = ndpi_nearest_power_of_two(sketch->num_hash_buckets)-1, + + len = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS * sizeof(u_int32_t); + sketch->tables = (u_int32_t*)ndpi_calloc(num_hashes, NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS * sizeof(u_int32_t)); + +#ifdef DEBUG + tot_mem += len; +#endif + +#ifdef DEBUG + printf("[Num_Hashes: %u][Total memory: %u]\n", num_hashes, tot_mem); +#endif + + if(!sketch->tables) { + ndpi_free(sketch); + return(NULL); + } + + return(sketch); +} + +/* ********************************************************************************* */ + +#define ndpi_simple_hash(value, seed) (value * seed) + +/* ********************************************************************************* */ + +void ndpi_cm_sketch_add(struct ndpi_cm_sketch *sketch, u_int32_t element) { + u_int32_t idx; + + for(idx = 1; idx <= sketch->num_hashes; idx++) { + u_int32_t hashval = ndpi_simple_hash(element, idx) & sketch->num_hash_buckets; + + sketch->tables[hashval]++; + +#ifdef DEBUG + printf("ndpi_add_sketch_add() [hash: %d][num_hash_buckets: %u][hashval: %d][value: %d]\n", + idx, sketch->num_hash_buckets, hashval, sketch->tables[hashval]); +#endif + } +} + +/* ********************************************************************************* */ + +u_int32_t ndpi_cm_sketch_count(struct ndpi_cm_sketch *sketch, u_int32_t element) { + u_int32_t min_value = INT_MAX, idx; + + for(idx = 1; idx <= sketch->num_hashes; idx++) { + u_int32_t hashval = ndpi_simple_hash(element, idx) & sketch->num_hash_buckets; + +#ifdef DEBUG + printf("ndpi_add_sketch_add() [hash: %d][num_hash_buckets: %u][hashval: %d][value: %d]\n", + idx, sketch->num_hash_buckets, hashval, sketch->tables[hashval]); +#endif + + min_value = ndpi_min(min_value, sketch->tables[hashval]); + } + + return(min_value); +} + +/* ********************************************************************************* */ + +void ndpi_cm_sketch_destroy(struct ndpi_cm_sketch *sketch) { + ndpi_free(sketch->tables); + ndpi_free(sketch); +} |