/* * ndpi_analyze.c * * Copyright (C) 2019-22 - ntop.org * * This file is part of nDPI, an open source deep packet inspection * library. * * nDPI is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * nDPI is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with nDPI. If not, see . * */ #include #include #include #include #include #include /* FLT_EPSILON */ #include "ndpi_api.h" #include "ndpi_config.h" /* ********************************************************************************* */ void ndpi_init_data_analysis(struct ndpi_analyze_struct *ret, u_int16_t _max_series_len) { u_int32_t len; memset(ret, 0, sizeof(*ret)); if(_max_series_len > MAX_SERIES_LEN) _max_series_len = MAX_SERIES_LEN; ret->num_values_array_len = _max_series_len; if(ret->num_values_array_len > 0) { len = sizeof(u_int32_t) * ret->num_values_array_len; if((ret->values = ndpi_malloc(len)) != NULL) memset(ret->values, 0, len); else ret->num_values_array_len = 0; } } /* ********************************************************************************* */ struct ndpi_analyze_struct* ndpi_alloc_data_analysis(u_int16_t _max_series_len) { struct ndpi_analyze_struct *ret = ndpi_malloc(sizeof(struct ndpi_analyze_struct)); if(ret != NULL) ndpi_init_data_analysis(ret, _max_series_len); return(ret); } /* ********************************************************************************* */ void ndpi_free_data_analysis(struct ndpi_analyze_struct *d, u_int8_t free_pointer) { if(d && d->values) ndpi_free(d->values); if(free_pointer) ndpi_free(d); } /* ********************************************************************************* */ void ndpi_reset_data_analysis(struct ndpi_analyze_struct *d) { u_int32_t *values_bkp; u_int32_t num_values_array_len_bpk; if(!d) return; values_bkp = d->values; num_values_array_len_bpk = d->num_values_array_len; memset(d, 0, sizeof(struct ndpi_analyze_struct)); d->values = values_bkp; d->num_values_array_len = num_values_array_len_bpk; if(d->values) memset(d->values, 0, sizeof(u_int32_t)*d->num_values_array_len); } /* ********************************************************************************* */ /* Add a new point to analyze */ void ndpi_data_add_value(struct ndpi_analyze_struct *s, const u_int32_t value) { if(!s) return; if(s->sum_total == 0) s->min_val = s->max_val = value; else { if(value < s->min_val) s->min_val = value; if(value > s->max_val) s->max_val = value; } s->sum_total += value, s->num_data_entries++; if(s->num_values_array_len) { s->values[s->next_value_insert_index] = value; if(++s->next_value_insert_index == s->num_values_array_len) s->next_value_insert_index = 0; } /* Optimized stddev calculation https://www.khanacademy.org/math/probability/data-distributions-a1/summarizing-spread-distributions/a/calculating-standard-deviation-step-by-step https://math.stackexchange.com/questions/683297/how-to-calculate-standard-deviation-without-detailed-historical-data http://mathcentral.uregina.ca/QQ/database/QQ.09.02/carlos1.html */ s->stddev.sum_square_total += (u_int64_t)value * (u_int64_t)value; } /* ********************************************************************************* */ /* Compute the average on all values */ float ndpi_data_average(struct ndpi_analyze_struct *s) { if(!s) return(0); return((s->num_data_entries == 0) ? 0 : ((float)s->sum_total / (float)s->num_data_entries)); } /* ********************************************************************************* */ u_int32_t ndpi_data_last(struct ndpi_analyze_struct *s) { if((!s) || (s->num_data_entries == 0) || (s->sum_total == 0)) return(0); if(s->next_value_insert_index == 0) return(s->values[s->num_values_array_len-1]); else return(s->values[s->next_value_insert_index-1]); } /* Return min/max on all values */ u_int32_t ndpi_data_min(struct ndpi_analyze_struct *s) { return(s->min_val); } u_int32_t ndpi_data_max(struct ndpi_analyze_struct *s) { return(s->max_val); } /* ********************************************************************************* */ /* Compute the variance on all values */ float ndpi_data_variance(struct ndpi_analyze_struct *s) { if(!s) return(0); float v = s->num_data_entries ? ((float)s->stddev.sum_square_total - ((float)s->sum_total * (float)s->sum_total / (float)s->num_data_entries)) / (float)s->num_data_entries : 0.0; return((v < 0 /* rounding problem */) ? 0 : v); } /* ********************************************************************************* */ /* See the link below for "Population and sample standard deviation review" https://www.khanacademy.org/math/statistics-probability/summarizing-quantitative-data/variance-standard-deviation-sample/a/population-and-sample-standard-deviation-review In nDPI we use an approximate stddev calculation to avoid storing all data in memory */ /* Compute the standard deviation on all values */ float ndpi_data_stddev(struct ndpi_analyze_struct *s) { return(sqrt(ndpi_data_variance(s))); } /* ********************************************************************************* */ /* Compute the mean on all values NOTE: In statistics, there is no difference between the mean and average */ float ndpi_data_mean(struct ndpi_analyze_struct *s) { return(ndpi_data_average(s)); } /* ********************************************************************************* */ /* Compute the average only on the sliding window */ float ndpi_data_window_average(struct ndpi_analyze_struct *s) { if(s && s->num_values_array_len) { float sum = 0.0; u_int16_t i, n = ndpi_min(s->num_data_entries, s->num_values_array_len); if(n == 0) return(0); for(i=0; ivalues[i]; return((float)sum / (float)n); } else return(0); } /* ********************************************************************************* */ /* Compute the variance only on the sliding window */ float ndpi_data_window_variance(struct ndpi_analyze_struct *s) { if(s && s->num_values_array_len) { float sum = 0.0, avg = ndpi_data_window_average(s); u_int16_t i, n = ndpi_min(s->num_data_entries, s->num_values_array_len); if(n == 0) return(0); for(i=0; ivalues[i]-avg, 2); return((float)sum / (float)n); } else return(0); } /* ********************************************************************************* */ /* Compute the variance only on the sliding window */ float ndpi_data_window_stddev(struct ndpi_analyze_struct *s) { return(sqrt(ndpi_data_window_variance(s))); } /* ********************************************************************************* */ /* Compute entropy on the last sliding window values */ float ndpi_data_entropy(struct ndpi_analyze_struct *s) { if(s && s->num_values_array_len) { int i; float sum = 0.0, total = 0.0; for(i=0; inum_values_array_len; i++) total += s->values[i]; if(fpclassify(total) == FP_ZERO) return(0); for (i=0; inum_values_array_len; i++) { float tmp = (float)s->values[i] / (float)total; if(tmp > FLT_EPSILON) sum -= tmp * logf(tmp); } return(sum / logf(2.0)); } else return(0); } /* ********************************************************************************* */ void ndpi_data_print_window_values(struct ndpi_analyze_struct *s) { if(s && s->num_values_array_len) { u_int16_t i, n = ndpi_min(s->num_data_entries, s->num_values_array_len); for(i=0; ivalues[i]); printf("\n"); } } /* ********************************************************************************* */ /* Upload / download ration -1 Download 0 Mixed 1 Upload */ float ndpi_data_ratio(u_int32_t sent, u_int32_t rcvd) { float s = (float)((int64_t)sent + (int64_t)rcvd); float d = (float)((int64_t)sent - (int64_t)rcvd); return((s == 0) ? 0 : (d/s)); } /* ********************************************************************************* */ const char* ndpi_data_ratio2str(float ratio) { if(ratio < -0.2) return("Download"); else if(ratio > 0.2) return("Upload"); else return("Mixed"); } /* ********************************************************************************* */ /* ********************************************************************************* */ #include "third_party/src/hll/hll.c" #include "third_party/src/hll/MurmurHash3.c" int ndpi_hll_init(struct ndpi_hll *hll, u_int8_t bits) { return(hll_init(hll, bits)); } void ndpi_hll_destroy(struct ndpi_hll *hll) { hll_destroy(hll); } void ndpi_hll_reset(struct ndpi_hll *hll) { hll_reset(hll); } int ndpi_hll_add(struct ndpi_hll *hll, const char *data, size_t data_len) { return(hll_add(hll, (const void *)data, data_len)); } /* 1 = rank changed, 0 = no changes in rank */ int ndpi_hll_add_number(struct ndpi_hll *hll, u_int32_t value) { return(hll_add(hll, (const void *)&value, sizeof(value))); } double ndpi_hll_count(struct ndpi_hll *hll) { return(hll_count(hll)); } /* ********************************************************************************* */ /* ********************************************************************************* */ int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int16_t num_bins) { if(!b) return(-1); b->num_bins = num_bins, b->family = f, b->is_empty = 1; switch(f) { case ndpi_bin_family8: if((b->u.bins8 = (u_int8_t*)ndpi_calloc(num_bins, sizeof(u_int8_t))) == NULL) return(-1); break; case ndpi_bin_family16: if((b->u.bins16 = (u_int16_t*)ndpi_calloc(num_bins, sizeof(u_int16_t))) == NULL) return(-1); break; case ndpi_bin_family32: if((b->u.bins32 = (u_int32_t*)ndpi_calloc(num_bins, sizeof(u_int32_t))) == NULL) return(-1); break; case ndpi_bin_family64: if((b->u.bins64 = (u_int64_t*)ndpi_calloc(num_bins, sizeof(u_int64_t))) == NULL) return(-1); break; } return(0); } /* ********************************************************************************* */ void ndpi_free_bin(struct ndpi_bin *b) { if(!b || !b->u.bins8) return; switch(b->family) { case ndpi_bin_family8: ndpi_free(b->u.bins8); break; case ndpi_bin_family16: ndpi_free(b->u.bins16); break; case ndpi_bin_family32: ndpi_free(b->u.bins32); break; case ndpi_bin_family64: ndpi_free(b->u.bins64); break; } } /* ********************************************************************************* */ struct ndpi_bin* ndpi_clone_bin(struct ndpi_bin *b) { struct ndpi_bin *out; if(!b || !b->u.bins8) return(NULL); out = (struct ndpi_bin*)ndpi_malloc(sizeof(struct ndpi_bin)); if(!out) return(NULL); out->num_bins = b->num_bins, out->family = b->family, out->is_empty = b->is_empty; switch(out->family) { case ndpi_bin_family8: if((out->u.bins8 = (u_int8_t*)ndpi_calloc(out->num_bins, sizeof(u_int8_t))) == NULL) { ndpi_free(out); return(NULL); } else memcpy(out->u.bins8, b->u.bins8, out->num_bins*sizeof(u_int8_t)); break; case ndpi_bin_family16: if((out->u.bins16 = (u_int16_t*)ndpi_calloc(out->num_bins, sizeof(u_int16_t))) == NULL) { ndpi_free(out); return(NULL); } else memcpy(out->u.bins16, b->u.bins16, out->num_bins*sizeof(u_int16_t)); break; case ndpi_bin_family32: if((out->u.bins32 = (u_int32_t*)ndpi_calloc(out->num_bins, sizeof(u_int32_t))) == NULL) { ndpi_free(out); return(NULL); } else memcpy(out->u.bins32, b->u.bins32, out->num_bins*sizeof(u_int32_t)); break; case ndpi_bin_family64: if((out->u.bins64 = (u_int64_t*)ndpi_calloc(out->num_bins, sizeof(u_int64_t))) == NULL) { ndpi_free(out); return(NULL); } else memcpy(out->u.bins64, b->u.bins64, out->num_bins*sizeof(u_int64_t)); break; } return(out); } /* ********************************************************************************* */ void ndpi_set_bin(struct ndpi_bin *b, u_int16_t slot_id, u_int64_t val) { if(!b || !b->u.bins8 || b->num_bins == 0) return; if(slot_id >= b->num_bins) slot_id = 0; switch(b->family) { case ndpi_bin_family8: b->u.bins8[slot_id] = (u_int8_t)val; break; case ndpi_bin_family16: b->u.bins16[slot_id] = (u_int16_t)val; break; case ndpi_bin_family32: b->u.bins32[slot_id] = (u_int32_t)val; break; case ndpi_bin_family64: b->u.bins64[slot_id] = (u_int64_t)val; break; } } /* ********************************************************************************* */ void ndpi_inc_bin(struct ndpi_bin *b, u_int16_t slot_id, u_int64_t val) { if(!b || !b->u.bins8 || b->num_bins == 0) return; b->is_empty = 0; if(slot_id >= b->num_bins) slot_id = 0; switch(b->family) { case ndpi_bin_family8: b->u.bins8[slot_id] += (u_int8_t)val; break; case ndpi_bin_family16: b->u.bins16[slot_id] += (u_int16_t)val; break; case ndpi_bin_family32: b->u.bins32[slot_id] += (u_int32_t)val; break; case ndpi_bin_family64: b->u.bins64[slot_id] += (u_int64_t)val; break; } } /* ********************************************************************************* */ u_int64_t ndpi_get_bin_value(struct ndpi_bin *b, u_int16_t slot_id) { if(!b || !b->u.bins8 || b->num_bins == 0) return(0); if(slot_id >= b->num_bins) slot_id = 0; switch(b->family) { case ndpi_bin_family8: return(b->u.bins8[slot_id]); break; case ndpi_bin_family16: return(b->u.bins16[slot_id]); break; case ndpi_bin_family32: return(b->u.bins32[slot_id]); break; case ndpi_bin_family64: return(b->u.bins64[slot_id]); break; } return(0); } /* ********************************************************************************* */ void ndpi_reset_bin(struct ndpi_bin *b) { if(!b || !b->u.bins8) return; b->is_empty = 1; switch(b->family) { case ndpi_bin_family8: memset(b->u.bins8, 0, sizeof(u_int8_t)*b->num_bins); break; case ndpi_bin_family16: memset(b->u.bins16, 0, sizeof(u_int16_t)*b->num_bins); break; case ndpi_bin_family32: memset(b->u.bins32, 0, sizeof(u_int32_t)*b->num_bins); break; case ndpi_bin_family64: memset(b->u.bins64, 0, sizeof(u_int64_t)*b->num_bins); break; } } /* ********************************************************************************* */ /* Each bin slot is transformed in a % with respect to the value total */ void ndpi_normalize_bin(struct ndpi_bin *b) { u_int16_t i; u_int32_t tot = 0; if(!b || b->is_empty) return; switch(b->family) { case ndpi_bin_family8: for(i=0; inum_bins; i++) tot += b->u.bins8[i]; if(tot > 0) { for(i=0; inum_bins; i++) b->u.bins8[i] = (b->u.bins8[i]*100) / tot; } break; case ndpi_bin_family16: for(i=0; inum_bins; i++) tot += b->u.bins16[i]; if(tot > 0) { for(i=0; inum_bins; i++) b->u.bins16[i] = (b->u.bins16[i]*100) / tot; } break; case ndpi_bin_family32: for(i=0; inum_bins; i++) tot += b->u.bins32[i]; if(tot > 0) { for(i=0; inum_bins; i++) b->u.bins32[i] = (b->u.bins32[i]*100) / tot; } break; case ndpi_bin_family64: for(i=0; inum_bins; i++) tot += b->u.bins64[i]; if(tot > 0) { for(i=0; inum_bins; i++) b->u.bins64[i] = (b->u.bins64[i]*100) / tot; } break; } } /* ********************************************************************************* */ char* ndpi_print_bin(struct ndpi_bin *b, u_int8_t normalize_first, char *out_buf, u_int out_buf_len) { u_int16_t i; u_int len = 0; if(!b || !out_buf) return(out_buf); else out_buf[0] = '\0'; if(normalize_first) ndpi_normalize_bin(b); switch(b->family) { case ndpi_bin_family8: for(i=0; inum_bins; i++) { int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%u", (i > 0) ? "," : "", b->u.bins8[i]); if(rc < 0) break; len += rc; } break; case ndpi_bin_family16: for(i=0; inum_bins; i++) { int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%u", (i > 0) ? "," : "", b->u.bins16[i]); if(rc < 0) break; len += rc; } break; case ndpi_bin_family32: for(i=0; inum_bins; i++) { int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%u", (i > 0) ? "," : "", b->u.bins32[i]); if(rc < 0) break; len += rc; } break; case ndpi_bin_family64: for(i=0; inum_bins; i++) { int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%llu", (i > 0) ? "," : "", (unsigned long long)b->u.bins64[i]); if(rc < 0) break; len += rc; } break; } return(out_buf); } /* ********************************************************************************* */ // #define COSINE_SIMILARITY /* Determines how similar are two bins Cosine Similiarity 0 = Very differet ... (gray zone) 1 = Alike See https://en.wikipedia.org/wiki/Cosine_similarity for more details --- Euclidean similarity 0 = alike ... the higher the more different if similarity_max_threshold != 0, we assume that bins arent similar */ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first, float similarity_max_threshold) { u_int16_t i; float threshold = similarity_max_threshold*similarity_max_threshold; if( // (b1->family != b2->family) || (b1->num_bins != b2->num_bins)) return(-1); if(normalize_first) ndpi_normalize_bin(b1), ndpi_normalize_bin(b2); #ifdef COSINE_SIMILARITY { u_int32_t sumxx = 0, sumxy = 0, sumyy = 0; for(i=0; inum_bins; i++) { u_int32_t a = ndpi_get_bin_value(b1, i); u_int32_t b = ndpi_get_bin_value(b2, i); sumxx += a*a, sumyy += b*b, sumxy += a*b; } if((sumxx == 0) || (sumyy == 0)) return(0); else return((float)sumxy / sqrt((float)(sumxx * sumyy))); } #else { double sum = 0; for(i=0; inum_bins; i++) { u_int32_t a = ndpi_get_bin_value(b1, i); u_int32_t b = ndpi_get_bin_value(b2, i); u_int32_t diff = (a > b) ? (a - b) : (b - a); if(a != b) sum += pow(diff, 2); if(threshold && (sum > threshold)) return(-2); /* Sorry they are not similar */ // printf("%u/%u) [a: %u][b: %u][sum: %u]\n", i, b1->num_bins, a, b, sum); } /* The lower the more similar */ return(sqrt(sum)); } #endif } /* ********************************************************************************* */ #define MAX_NUM_CLUSTERS 128 /* Clusters bins into 'num_clusters' - (in) bins: a vection 'num_bins' long of bins to cluster - (in) 'num_clusters': number of desired clusters 0...(num_clusters-1) - (out) 'cluster_ids': a vector 'num_bins' long containing the id's of each clustered bin - (out) 'centroids': an optional 'num_clusters' long vector of (centroid) bins See - https://en.wikipedia.org/wiki/K-means_clustering */ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins, u_int8_t num_clusters, u_int16_t *cluster_ids, struct ndpi_bin *centroids) { u_int16_t i, j, max_iterations = 25, num_iterations, num_moves; u_int8_t verbose = 0, alloc_centroids = 0; char out_buf[256]; float *bin_score; u_int16_t num_cluster_elems[MAX_NUM_CLUSTERS] = { 0 }; srand(time(NULL)); if(!bins || num_bins == 0 || !cluster_ids || num_clusters == 0) return(-1); if(num_clusters > num_bins) num_clusters = num_bins; if(num_clusters > MAX_NUM_CLUSTERS) num_clusters = MAX_NUM_CLUSTERS; if(verbose) printf("Distributing %u bins over %u clusters\n", num_bins, num_clusters); if((bin_score = (float*)ndpi_calloc(num_bins, sizeof(float))) == NULL) return(-2); if(centroids == NULL) { alloc_centroids = 1; if((centroids = (struct ndpi_bin*)ndpi_malloc(sizeof(struct ndpi_bin)*num_clusters)) == NULL) { ndpi_free(bin_score); return(-2); } else { for(i=0; i best_similarity) { cluster_id = j, best_similarity = similarity; } #else if(similarity < best_similarity) { cluster_id = j, best_similarity = similarity; } #endif } if((best_similarity == current_similarity) && (num_cluster_elems[cluster_ids[i]] > 1)) { /* In case of identical similarity let's leave things as they are this unless this is a cluster with only one element */ cluster_id = cluster_ids[i]; } bin_score[i] = best_similarity; if(cluster_ids[i] != cluster_id) { if(verbose) printf("Moved bin %u from cluster %u -> %u [similarity: %f]\n", i, cluster_ids[i], cluster_id, best_similarity); num_cluster_elems[cluster_ids[i]]--; num_cluster_elems[cluster_id]++; cluster_ids[i] = cluster_id; num_moves++; } } if(num_moves == 0) break; if(verbose) { for(j=0; j 1)) score = bin_score[i], candidate = i; } #else score = 0; for(i=0; i score) && (num_cluster_elems[cluster_ids[i]] > 1)) score = bin_score[i], candidate = i; } #endif if(verbose) printf("Rebalance: moving bin %u from cluster %u -> %u [similarity: %f]\n", candidate, cluster_ids[candidate], j, score); num_cluster_elems[cluster_ids[candidate]]--; num_cluster_elems[j]++; cluster_ids[candidate] = j; } } #endif } /* while(...) */ if(alloc_centroids) { for(i=0; iempty = 1, s->num_values = num_learning_values; s->gains = (u_int32_t*)ndpi_calloc(num_learning_values, sizeof(u_int32_t)); s->losses = (u_int32_t*)ndpi_calloc(num_learning_values, sizeof(u_int32_t)); if(s->gains && s->losses) { s->last_value = 0; return(0); } else { if(s->gains) ndpi_free(s->gains); if(s->losses) ndpi_free(s->losses); return(-1); } } /* ************************************* */ void ndpi_free_rsi(struct ndpi_rsi_struct *s) { ndpi_free(s->gains), ndpi_free(s->losses); } /* ************************************* */ // #define DEBUG_RSI /* This function adds a new value and returns the computed RSI, or -1 if there are too few points (< num_learning_values) RSI < 30 (too many losses) RSI > 70 (too many gains) */ float ndpi_rsi_add_value(struct ndpi_rsi_struct *s, const u_int32_t value) { float relative_strength; if(!s->empty) { u_int32_t val; s->total_gains -= s->gains[s->next_index], s->total_losses -= s->losses[s->next_index]; if(value > s->last_value) { val = value - s->last_value; s->gains[s->next_index] = val, s->losses[s->next_index] = 0; s->total_gains += val; #ifdef DEBUG_RSI printf("Gain: %u\n", val); #endif } else { val = s->last_value - value; s->losses[s->next_index] = val, s->gains[s->next_index] = 0; s->total_losses += val; #ifdef DEBUG_RSI printf("Loss: %u\n", val); #endif } #ifdef DEBUG_RSI printf("[value: %u][total_gains: %u][total_losses: %u][cur_idx: %u]\n", value, s->total_gains, s->total_losses, s->next_index); #endif } s->last_value = value, s->next_index = (s->next_index + 1) % s->num_values, s->empty = 0; if(s->next_index == 0) s->rsi_ready = 1; /* We have completed one round */ if(!s->rsi_ready) return(-1); /* Too early */ else if(s->total_losses == 0) /* Avoid division by zero (**) */ return(100.); else { relative_strength = (float)s->total_gains / (float)s->total_losses; /* (**) */ #ifdef DEBUG_RSI printf("RSI: %f\n", relative_strength); #endif return(100. - (100. / (1. + relative_strength))); } } /* *********************************************************** */ /* https://www.johndcook.com/blog/cpp_phi_inverse/ */ static double ndpi_rational_approximation(double t) { // Abramowitz and Stegun formula 26.2.23. // The absolute value of the error should be less than 4.5 e-4. double c[] = { 2.515517, 0.802853, 0.010328 }; double d[] = { 1.432788, 0.189269, 0.001308 }; return(t - ((c[2]*t + c[1])*t + c[0]) / (((d[2]*t + d[1])*t + d[0])*t + 1.0)); } static double ndpi_normal_cdf_inverse(double p) { if(p <= 0.0 || p >= 1.0) return(0); /* Invalid argument: valid range 0 < X < 1 */ if(p < 0.5) { // F^-1(p) = - G^-1(p) return -ndpi_rational_approximation( sqrt(-2.0*log(p)) ); } else { // F^-1(p) = G^-1(1-p) return ndpi_rational_approximation( sqrt(-2.0*log(1-p)) ); } } double ndpi_avg_inline(u_int64_t *v, u_int num) { double avg = 0; u_int i; for(i=0; iparams.num_season_periods = num_periods + 1; hw->params.alpha = alpha; hw->params.beta = beta; hw->params.gamma = gamma; hw->params.use_hw_additive_seasonal = additive_seeasonal; if((significance < 0) || (significance > 1)) significance = 0.05; hw->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.)); if((hw->y = (u_int64_t*)ndpi_calloc(hw->params.num_season_periods, sizeof(u_int64_t))) == NULL) return(-1); if((hw->s = (double*)ndpi_calloc(hw->params.num_season_periods, sizeof(double))) == NULL) { ndpi_free(hw->y); return(-1); } return(0); } /* *********************************************************** */ /* Frees the memory allocated by ndpi_hw_init() */ void ndpi_hw_free(struct ndpi_hw_struct *hw) { if(hw->y) ndpi_free(hw->y); if(hw->s) ndpi_free(hw->s); } /* *********************************************************** */ /* Returns the forecast and the band (forecast +/- band are the upper and lower values) Input hw: Datastructure previously initialized value The value to add to the measurement Output forecast The forecasted value confidence_band The value +/- on which the value should fall is not an anomaly Return code 0 Too early: we're still in the learning phase. Output values are zero. 1 Normal processing: forecast and confidence_band are meaningful */ int ndpi_hw_add_value(struct ndpi_hw_struct *hw, const u_int64_t _value, double *forecast, double *confidence_band) { if(hw->num_values < hw->params.num_season_periods) { hw->y[hw->num_values++] = _value; *forecast = 0; *confidence_band = 0; return(0); /* Too early still training... */ } else { u_int idx = hw->num_values % hw->params.num_season_periods; double prev_u, prev_v, prev_s, value = (double)_value; double sq, error, sq_error; u_int observations; if(hw->num_values == hw->params.num_season_periods) { double avg = ndpi_avg_inline(hw->y, hw->params.num_season_periods); u_int i; if(avg == 0) avg = 1; /* Avoid divisions by zero */ for(i=0; iparams.num_season_periods; i++) hw->s[i] = hw->y[i] / avg; i = hw->params.num_season_periods-1; if(hw->s[i] == 0) hw->u = 0; else hw->u = _value / hw->s[i]; hw->v = 0; ndpi_free(hw->y); hw->y = NULL; } idx = hw->num_values % hw->params.num_season_periods; prev_u = hw->u, prev_v = hw->v, prev_s = hw->s[idx]; if(prev_s != 0) hw->u = ((hw->params.alpha * value) / prev_s) + ( 1 - hw->params.alpha) * (hw->u + hw->v); else hw->u = 0; /* Avoid divisions by zero */ hw->v = (hw->params.beta * (hw->u - prev_u)) + ((1 - hw->params.beta ) * hw->v); if(hw->u != 0) hw->s[idx] = (hw->params.gamma * (value / hw->u)) + ((1 - hw->params.gamma) * prev_s); else hw->s[idx] = 0; /* Avoid divisions by zero */ if(hw->params.use_hw_additive_seasonal) *forecast = (prev_u + prev_v) + prev_s; else *forecast = (prev_u + prev_v) * prev_s; error = value - *forecast; sq_error = error * error; hw->sum_square_error += sq_error, hw->prev_error.sum_square_error += sq_error; observations = (hw->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? hw->num_values : ((hw->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1); sq = sqrt(hw->sum_square_error / observations); *confidence_band = hw->params.ro * sq; #ifdef HW_DEBUG printf("[num_values: %u][u: %.3f][v: %.3f][s: %.3f][error: %.3f][forecast: %.3f][sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n", hw->num_values, hw->u, hw->v, hw->s[idx], error, *forecast, hw->sum_square_error, sq, *confidence_band); #endif hw->num_values++, idx = (idx + 1) % hw->params.num_season_periods; if(++hw->prev_error.num_values_rollup == MAX_SQUARE_ERROR_ITERATIONS) { hw->sum_square_error = hw->prev_error.sum_square_error; hw->prev_error.num_values_rollup = 0, hw->prev_error.sum_square_error = 0; } return(1); /* We're in business: forecast is meaningful now */ } } /* ********************************************************************************* */ /* ********************************************************************************* */ /* Jitter calculator Used to determine how noisy is a signal */ int ndpi_jitter_init(struct ndpi_jitter_struct *s, u_int16_t num_learning_values) { if(!s) return(-1); memset(s, 0, sizeof(struct ndpi_jitter_struct)); if(num_learning_values < 2) num_learning_values = 2; s->empty = 1, s->num_values = num_learning_values; s->observations = (float*)ndpi_calloc(num_learning_values, sizeof(float)); if(s->observations) { s->last_value = 0; return(0); } else return(-1); } /* ************************************* */ void ndpi_jitter_free(struct ndpi_jitter_struct *s) { ndpi_free(s->observations); } /* ************************************* */ /* This function adds a new value and returns the computed Jitter */ float ndpi_jitter_add_value(struct ndpi_jitter_struct *s, const float value) { float val = fabsf(value - s->last_value); if(s->empty && (s->next_index == 0)) ; /* Skip the first value as we are unable to calculate the difference */ else { s->jitter_total -= s->observations[s->next_index]; s->observations[s->next_index] = val; s->jitter_total += val; } s->last_value = value, s->next_index = (s->next_index + 1) % s->num_values; if(s->next_index == 0) s->jitter_ready = 1; /* We have completed one round */ #ifdef DEBUG_JITTER printf("[JITTER] [value: %.3f][diff: %.3f][jitter_total: %.3f] -> %.3f\n", value, val, s->jitter_total, s->jitter_ready ? (s->jitter_total / s->num_values) : -1); #endif if(!s->jitter_ready) return(-1); /* Too early */ else return(s->jitter_total / s->num_values); } /* *********************************************************** */ /* *********************************************************** */ /* Single Exponential Smoothing */ int ndpi_ses_init(struct ndpi_ses_struct *ses, double alpha, float significance) { if(!ses) return(-1); memset(ses, 0, sizeof(struct ndpi_ses_struct)); ses->params.alpha = alpha; if((significance < 0) || (significance > 1)) significance = 0.05; ses->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.)); return(0); } /* *********************************************************** */ /* Returns the forecast and the band (forecast +/- band are the upper and lower values) Input ses: Datastructure previously initialized value The value to add to the measurement Output forecast The forecasted value confidence_band The value +/- on which the value should fall is not an anomaly Return code 0 Too early: we're still in the learning phase. Output values are zero. 1 Normal processing: forecast and confidence_band are meaningful */ int ndpi_ses_add_value(struct ndpi_ses_struct *ses, const double _value, double *forecast, double *confidence_band) { double value = (double)_value, error, sq_error; int rc; if(ses->num_values == 0) *forecast = value; else *forecast = (ses->params.alpha * (ses->last_value - ses->last_forecast)) + ses->last_forecast; error = value - *forecast; sq_error = error * error; ses->sum_square_error += sq_error, ses->prev_error.sum_square_error += sq_error; if(ses->num_values > 0) { u_int observations = (ses->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? (ses->num_values + 1) : ((ses->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1); double sq = sqrt(ses->sum_square_error / observations); *confidence_band = ses->params.ro * sq; rc = 1; } else *confidence_band = 0, rc = 0; ses->num_values++, ses->last_value = value, ses->last_forecast = *forecast; if(++ses->prev_error.num_values_rollup == MAX_SQUARE_ERROR_ITERATIONS) { ses->sum_square_error = ses->prev_error.sum_square_error; ses->prev_error.num_values_rollup = 0, ses->prev_error.sum_square_error = 0; } #ifdef SES_DEBUG printf("[num_values: %u][[error: %.3f][forecast: %.3f][sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n", ses->num_values, error, *forecast, ses->sum_square_error, sq_error, *confidence_band); #endif return(rc); } /* *********************************************************** */ /* Computes the best alpha value using the specified values used for training */ void ndpi_ses_fitting(double *values, u_int32_t num_values, float *ret_alpha) { u_int i; float alpha, best_alpha; double sse, lowest_sse; int trace = 0; if(!values || num_values == 0) { *ret_alpha = 0; return; } lowest_sse = 0, best_alpha = 0; for(alpha=0.1; alpha<0.99; alpha += 0.05) { struct ndpi_ses_struct ses; ndpi_ses_init(&ses, alpha, 0.05); if(trace) printf("\nDouble Exponential Smoothing [alpha: %.2f]\n", alpha); sse = 0; for(i=0; iparams.alpha = alpha; des->params.beta = beta; if((significance < 0) || (significance > 1)) significance = 0.05; des->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.)); return(0); } /* *********************************************************** */ /* Returns the forecast and the band (forecast +/- band are the upper and lower values) Input des: Datastructure previously initialized value The value to add to the measurement Output forecast The forecasted value confidence_band The value +/- on which the value should fall is not an anomaly Return code 0 Too early: we're still in the learning phase. Output values are zero. 1 Normal processing: forecast and confidence_band are meaningful */ int ndpi_des_add_value(struct ndpi_des_struct *des, const double _value, double *forecast, double *confidence_band) { double value = (double)_value, error, sq_error; int rc; if(des->num_values == 0) *forecast = value, des->last_trend = 0; else { *forecast = (des->params.alpha * value) + ((1 - des->params.alpha) * (des->last_forecast + des->last_trend)); des->last_trend = (des->params.beta * (*forecast - des->last_forecast)) + ((1 - des->params.beta) * des->last_trend); } error = value - *forecast; sq_error = error * error; des->sum_square_error += sq_error, des->prev_error.sum_square_error += sq_error; if(des->num_values > 0) { u_int observations = (des->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? (des->num_values + 1) : ((des->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1); double sq = sqrt(des->sum_square_error / observations); *confidence_band = des->params.ro * sq; rc = 1; } else *confidence_band = 0, rc = 0; des->num_values++, des->last_value = value, des->last_forecast = *forecast; if(++des->prev_error.num_values_rollup == MAX_SQUARE_ERROR_ITERATIONS) { des->sum_square_error = des->prev_error.sum_square_error; des->prev_error.num_values_rollup = 0, des->prev_error.sum_square_error = 0; } #ifdef DES_DEBUG printf("[num_values: %u][[error: %.3f][forecast: %.3f][trend: %.3f[sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n", des->num_values, error, *forecast, des->last_trend, des->sum_square_error, sq, *confidence_band); #endif return(rc); } /* *********************************************************** */ /* Computes the best alpha and beta values using the specified values used for training */ void ndpi_des_fitting(double *values, u_int32_t num_values, float *ret_alpha, float *ret_beta) { u_int i; float alpha, best_alpha, best_beta, beta = 0; double sse, lowest_sse; int trace = 0; if(!values || num_values == 0) { *ret_alpha = 0; *ret_beta = 0; return; } lowest_sse = 0, best_alpha = 0, best_beta = 0; for(beta=0.1; beta<0.99; beta += 0.05) { for(alpha=0.1; alpha<0.99; alpha += 0.05) { struct ndpi_des_struct des; ndpi_des_init(&des, alpha, beta, 0.05); if(trace) printf("\nDouble Exponential Smoothing [alpha: %.2f][beta: %.2f]\n", alpha, beta); sse = 0; for(i=0; i high_threshold)) ? true : false; if(is_outlier) ret++; outliers[i] = is_outlier; } ndpi_free_data_analysis(&a, 0); return(ret); }