diff options
author | Luca <deri@ntop.org> | 2023-12-28 19:59:54 +0100 |
---|---|---|
committer | Luca <deri@ntop.org> | 2023-12-28 19:59:54 +0100 |
commit | 2f657cb8f9001e9944033299758a150d8c0b4a2c (patch) | |
tree | 055f3352bec2ea995538945046d0b242b5b4b8d5 /src | |
parent | 1366518bff88210008159cc385f092bd5cfc6252 (diff) |
Implemented ndpi_is_outlier() for detecting outliers using z-score
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h | 23 | ||||
-rw-r--r-- | src/lib/ndpi_analyze.c | 36 |
2 files changed, 54 insertions, 5 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 9200fcbf5..f834223c6 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -1948,6 +1948,29 @@ extern "C" { double ndpi_pearson_correlation(u_int32_t *values_a, u_int32_t *values_b, u_int16_t num_values); /* ******************************* */ + + /* + * Checks if a specified value is an outlier with respect to past values + * using the Z-score. + * + * @par past_valuea = List of observed past values (past knowledge) + * @par num_past_values = Number of observed past values + * @par value_to_check = The value to be checked with respect to past values + * @par threshold = Threshold on z-score:. Typical values: + * t = 1 - The value to check should not exceed the past values + * t > 1 - The value to check has to be within (t * stddev) boundaries + * @par lower - [out] Lower threshold + * @par upper - [out] Upper threshold + * + * @return true if the specified value is an outlier, false otherwise + * + */ + + bool ndpi_is_outlier(u_int32_t *past_values, u_int32_t num_past_values, + u_int32_t value_to_check, float threshold, + float *lower, float *upper); + + /* ******************************* */ u_int32_t ndpi_quick_16_byte_hash(u_int8_t *in_16_bytes_long); diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index 7a2fd495c..06d461561 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -71,7 +71,7 @@ struct ndpi_analyze_struct* ndpi_alloc_data_analysis_from_series(const u_int32_t struct ndpi_analyze_struct *ret = ndpi_alloc_data_analysis(num_values); if(ret == NULL) return(NULL); - + for(i=0; i<num_values; i++) ndpi_data_add_value(ret, (const u_int64_t)values[i]); @@ -1648,6 +1648,31 @@ u_int ndpi_find_outliers(u_int32_t *values, bool *outliers, u_int32_t num_values return(ret); } +/* *********************************************************** */ + +/* Check if the specified value is an outlier with respect to the past values */ +bool ndpi_is_outlier(u_int32_t *past_values, u_int32_t num_past_values, + u_int32_t value_to_check, float threshold, + float *lower, float *upper) { + struct ndpi_analyze_struct *data = ndpi_alloc_data_analysis_from_series(past_values, num_past_values); + float mean, stddev, v; + + if(!data) return(false); + + mean = ndpi_data_mean(data); + stddev = ndpi_data_stddev(data); + + /* The mimimum threshold is 1 (i.e. the value of the stddev) */ + if(threshold < 1.) threshold = 1.; + + v = threshold * stddev; + *lower = mean - v, *upper = mean + v; + + ndpi_free_data_analysis(data, 1 /* free memory */); + + return(((value_to_check < *lower) || (value_to_check > *upper)) ? true : false); +} + /* ********************************************************************************* */ /* @@ -1690,7 +1715,7 @@ double ndpi_pearson_correlation(u_int32_t *values_a, u_int32_t *values_b, u_int1 double mean_a, mean_b, variance_a, variance_b, covariance; if(num_values == 0) return(0.0); - + for(i = 0; i < num_values; i++) sum_a += values_a[i], sum_b += values_b[i]; @@ -1703,11 +1728,12 @@ double ndpi_pearson_correlation(u_int32_t *values_a, u_int32_t *values_b, u_int1 variance_a = sum_squared_diff_a / (double)num_values, variance_b = sum_squared_diff_b / (double)num_values; covariance = sum_product_diff / (double)num_values; - + return(covariance / sqrt(variance_a * variance_b)); } /* ********************************************************************************* */ +/* ********************************************************************************* */ static const u_int16_t crc16_ccitt_table[256] = { 0x0000, 0x1189, 0x2312, 0x329B, 0x4624, 0x57AD, 0x6536, 0x74BF, @@ -1911,7 +1937,7 @@ struct ndpi_cm_sketch *ndpi_cm_sketch_init(u_int16_t num_hashes) { num_hashes = ndpi_nearest_power_of_two(num_hashes); sketch->num_hashes = num_hashes; - sketch->num_hash_buckets = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS; + sketch->num_hash_buckets = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS; sketch->num_hash_buckets = ndpi_nearest_power_of_two(sketch->num_hash_buckets)-1, len = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS * sizeof(u_int32_t); @@ -1966,7 +1992,7 @@ u_int32_t ndpi_cm_sketch_count(struct ndpi_cm_sketch *sketch, u_int32_t element) printf("ndpi_add_sketch_add() [hash: %d][num_hash_buckets: %u][hashval: %d][value: %d]\n", idx, sketch->num_hash_buckets, hashval, sketch->tables[hashval]); #endif - + min_value = ndpi_min(min_value, sketch->tables[hashval]); } |