diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h.in | 15 | ||||
-rw-r--r-- | src/lib/ndpi_analyze.c | 42 |
2 files changed, 57 insertions, 0 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index abd67655f..38cd7edc1 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -1558,6 +1558,7 @@ extern "C" { float ndpi_data_entropy(struct ndpi_analyze_struct *s); float ndpi_data_variance(struct ndpi_analyze_struct *s); float ndpi_data_stddev(struct ndpi_analyze_struct *s); + float ndpi_data_mean(struct ndpi_analyze_struct *s); u_int32_t ndpi_data_last(struct ndpi_analyze_struct *s); u_int32_t ndpi_data_min(struct ndpi_analyze_struct *s); u_int32_t ndpi_data_max(struct ndpi_analyze_struct *s); @@ -1653,6 +1654,20 @@ extern "C" { /* ******************************* */ + /* + * Finds outliers using Z-score + * Z-Score = (Value - Mean) / StdDev + * + * @par values = pointer to the individual values to be analyzed [in] + * @par outliers = pointer to a list of outliers identified [out] + * @par num_values = lenght of values and outliers that MUST have the same lenght [in] + * + * @return The number of outliers found + */ + u_int ndpi_find_outliers(u_int32_t *values, bool *outliers, u_int32_t num_values); + + /* ******************************* */ + u_int32_t ndpi_quick_16_byte_hash(u_int8_t *in_16_bytes_long); /* ******************************* */ diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index beb6ca750..ebb5617ef 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -161,6 +161,16 @@ float ndpi_data_stddev(struct ndpi_analyze_struct *s) { /* ********************************************************************************* */ +/* + Compute the mean on all values + NOTE: In statistics, there is no difference between the mean and average +*/ +float ndpi_data_mean(struct ndpi_analyze_struct *s) { + return(ndpi_data_average(s)); +} + +/* ********************************************************************************* */ + /* Compute the average only on the sliding window */ float ndpi_data_window_average(struct ndpi_analyze_struct *s) { if(s->num_values_array_len) { @@ -1425,3 +1435,35 @@ void ndpi_des_fitting(double *values, u_int32_t num_values, float *ret_alpha, fl *ret_alpha = best_alpha, *ret_beta = best_beta; } + +/* *********************************************************** */ + +/* Z-Score = (Value - Mean) / StdDev */ +u_int ndpi_find_outliers(u_int32_t *values, bool *outliers, u_int32_t num_values) { + u_int i, ret = 0; + float mean, stddev, low_threshold = -2.5, high_threshold = 2.5; + struct ndpi_analyze_struct a; + + ndpi_init_data_analysis(&a, 3 /* this is the window so we do not need to store values and 3 is enough */); + + /* Add values */ + for(i=0; i<num_values; i++) + ndpi_data_add_value(&a, values[i]); + + mean = ndpi_data_mean(&a); + stddev = ndpi_data_stddev(&a); + + /* Process values */ + for(i=0; i<num_values; i++) { + float z_score = (((float)values[i]) - mean) / stddev; + bool is_outlier = ((z_score < low_threshold) || (z_score > high_threshold)) ? true : false; + + if(is_outlier) ret++; + outliers[i] = is_outlier; + } + + ndpi_free_data_analysis(&a, 0); + + return(ret); +} + |