diff options
author | Luca Deri <deri@ntop.org> | 2021-02-08 19:10:25 +0100 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2021-02-08 19:10:25 +0100 |
commit | 732579b72b1e58de58502520fc678e87ea757677 (patch) | |
tree | 93eec0adb9b3b6d6d5d8d805aefc7107d8ff350d /src/lib/ndpi_analyze.c | |
parent | d9da752aa8e6cc2b6491a9f0b55723ad0241eeb8 (diff) |
Added timeseries forecasting support implementing Holt-Winters with confidence interval
New API calls added
- ndpi_hw_init()
- ndpi_hw_add_value()
- ndpi_hw_free()
Diffstat (limited to 'src/lib/ndpi_analyze.c')
-rw-r--r-- | src/lib/ndpi_analyze.c | 178 |
1 files changed, 167 insertions, 11 deletions
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index 1ee6e99b2..38f74bc0b 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -105,7 +105,7 @@ void ndpi_data_add_value(struct ndpi_analyze_struct *s, const u_int32_t value) { /* Optimized stddev calculation - + https://www.khanacademy.org/math/probability/data-distributions-a1/summarizing-spread-distributions/a/calculating-standard-deviation-step-by-step https://math.stackexchange.com/questions/683297/how-to-calculate-standard-deviation-without-detailed-historical-data http://mathcentral.uregina.ca/QQ/database/QQ.09.02/carlos1.html @@ -140,7 +140,7 @@ u_int32_t ndpi_data_max(struct ndpi_analyze_struct *s) { return(s->max_val); } /* Compute the variance on all values */ float ndpi_data_variance(struct ndpi_analyze_struct *s) { - return(s->num_data_entries ? ((float)s->stddev.sum_square_total - ((float)s->sum_total * (float)s->sum_total / (float)s->num_data_entries)) / (float)s->num_data_entries : 0.0); + return(s->num_data_entries ? ((float)s->stddev.sum_square_total - ((float)s->sum_total * (float)s->sum_total / (float)s->num_data_entries)) / (float)s->num_data_entries : 0.0); } /* ********************************************************************************* */ @@ -468,7 +468,7 @@ void ndpi_normalize_bin(struct ndpi_bin *b) { b->u.bins8[i] = (b->u.bins8[i]*100) / tot; } break; - + case ndpi_bin_family16: for(i=0; i<b->num_bins; i++) tot += b->u.bins16[i]; @@ -588,7 +588,7 @@ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t nor u_int32_t a = ndpi_get_bin_value(b1, i); u_int32_t b = ndpi_get_bin_value(b2, i); u_int32_t diff = (a > b) ? (a - b) : (b - a); - + if(a != b) sum += pow(diff, 2); // printf("[a: %u][b: %u][sum: %u]\n", a, b, sum); @@ -721,7 +721,7 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins, if(j == cluster_ids[i]) current_similarity = similarity; - + if(verbose) printf("Bin %u / centroid %u [similarity: %f]\n", i, j, similarity); @@ -743,7 +743,7 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins, */ cluster_id = cluster_ids[i]; } - + bin_score[i] = best_similarity; if(cluster_ids[i] != cluster_id) { @@ -818,7 +818,7 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins, /* ********************************************************************************* */ -/* +/* RSI (Relative Strength Index) RSI = 100 − [ 100/ (1 + (Average gain/Average loss)) ] @@ -856,12 +856,12 @@ void ndpi_free_rsi(struct ndpi_rsi_struct *s) { */ float ndpi_rsi_add_value(struct ndpi_rsi_struct *s, const u_int32_t value) { float relative_strength; - + if(!s->empty) { u_int32_t val; s->total_gains -= s->gains[s->next_index], s->total_losses -= s->losses[s->next_index]; - + if(value > s->last_value) { val = value - s->last_value; s->gains[s->next_index] = val, s->losses[s->next_index] = 0; @@ -882,10 +882,10 @@ float ndpi_rsi_add_value(struct ndpi_rsi_struct *s, const u_int32_t value) { printf("[value: %u][total_gains: %u][total_losses: %u][cur_idx: %u]\n", value, s->total_gains, s->total_losses, s->next_index); #endif } - + s->last_value = value, s->next_index = (s->next_index + 1) % s->num_values, s->empty = 0; if(s->next_index == 0) s->rsi_ready = 1; /* We have completed one round */ - + if(!s->rsi_ready) return(0); else if(s->total_losses == 0) /* Avoid division by zero (**) */ @@ -898,3 +898,159 @@ float ndpi_rsi_add_value(struct ndpi_rsi_struct *s, const u_int32_t value) { return(100. - (100. / (1. + relative_strength))); } } + +/* *********************************************************** */ + +/* https://www.johndcook.com/blog/cpp_phi_inverse/ */ + +static double ndpi_rational_approximation(double t) { + // Abramowitz and Stegun formula 26.2.23. + // The absolute value of the error should be less than 4.5 e-4. + double c[] = { 2.515517, 0.802853, 0.010328 }; + double d[] = { 1.432788, 0.189269, 0.001308 }; + + return(t - ((c[2]*t + c[1])*t + c[0]) / (((d[2]*t + d[1])*t + d[0])*t + 1.0)); +} + +static double ndpi_normal_cdf_inverse(double p) { + if(p <= 0.0 || p >= 1.0) + return(0); /* Invalid argument: valid range 0 < X < 1 */ + + if(p < 0.5) { + // F^-1(p) = - G^-1(p) + return -ndpi_rational_approximation( sqrt(-2.0*log(p)) ); + } else { + // F^-1(p) = G^-1(1-p) + return ndpi_rational_approximation( sqrt(-2.0*log(1-p)) ); + } +} + +double ndpi_avg_inline(u_int32_t *v, u_int num) { + double avg = 0; + u_int i; + + for(i=0; i<num; i++) + avg += v[i]; + + return(avg / (u_int32_t)num); +} + +/* *********************************************************** */ + +/* + Initializes Holt-Winters with Confidence Interval + + Input + hw: Datastructure to initialize and that needs tobe freed with ndpi_hw_free() + num_periods Number of observations of a season, or in ML-parlance the number of points that are required to make the forecast + additive If set to 1 will use the Holt-Winters additive seasonal (should be the default), otherwise the multiplicative seasonal. + alpha Level: specifies the coefficient for the level smoothing. Range 0..1. The higher α, the faster the method forgets old values + beta Trend: specifies the coefficient for the trend smoothing. Range 0..1. + gamma Seasonal: specifies the coefficient for the seasonal smoothing. Range 0..1. With gamma = 0, seaasonal correction is not used. + + significance Significance level for the forecats sed for computing lower and upper bands. Range 0..1. Typical values 0.05 or less. + See https://en.wikipedia.org/wiki/Statistical_significance + + NOTE (See https://otexts.com/fpp2/holt-winters.html) + The additive method is preferred when the seasonal variations are roughly constant through the series, + while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series. + + For learning more about timeseries forecasting see + https://www.real-statistics.com/time-series-analysis/basic-time-series-forecasting/ + */ + +int ndpi_hw_init(struct ndpi_hw_struct *hw, + u_int16_t num_periods, u_int8_t additive_seeasonal, + double alpha, double beta, double gamma, float significance) { + memset(hw, 0, sizeof(struct ndpi_hw_struct)); + + hw->params.num_season_periods = num_periods; + hw->params.alpha = alpha; + hw->params.beta = beta; + hw->params.gamma = gamma; + hw->params.use_hw_additive_seasonal = additive_seeasonal; + + if((significance < 0) || (significance > 1)) significance = 0.05; + hw->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.)); + + if((hw->y = (u_int32_t*)ndpi_calloc(hw->params.num_season_periods, sizeof(u_int32_t))) == NULL) + return(-1); + + if((hw->s = (double*)ndpi_calloc(hw->params.num_season_periods, sizeof(double))) == NULL) { + free(hw->y); + return(-1); + } + + return(0); +} + +/* *********************************************************** */ + +/* Frees the memory allocated by ndpi_hw_init() */ +void ndpi_hw_free(struct ndpi_hw_struct *hw) { + if(hw->y) ndpi_free(hw->y); + if(hw->s) ndpi_free(hw->s); +} + +/* *********************************************************** */ + +/* + Returns the forecast and the band (forecast +/- band are the upper and lower values) + + Input + hw: Datastructure previously initialized + value The value to add to the measurement + + Output + forecast The forecasted value + confidence_band The value +/- on which the value should fall is not an anomaly + + Return code + 0 Too early: we're still in the learning phase. Output values are zero. + 1 Normal processing: forecast and confidence_band are meaningful +*/ +int ndpi_hw_add_value(struct ndpi_hw_struct *hw, const u_int32_t _value, double *forecast, double *confidence_band) { + if(hw->num_values < hw->params.num_season_periods) { + hw->y[hw->num_values++] = _value; + + if(hw->num_values == hw->params.num_season_periods) { + double avg = ndpi_avg_inline(hw->y, hw->params.num_season_periods); + u_int i; + + for(i=0; i<hw->params.num_season_periods; i++) + hw->s[i] = hw->y[i] / avg; + + hw->u = _value / hw->s[hw->params.num_season_periods-1]; + hw->v = 0; + } + + *forecast = 0; + *confidence_band = 0; + + return(0); /* Too early still training... */ + } else { + u_int idx = hw->num_values % hw->params.num_season_periods; + double prev_u = hw->u; + double prev_v = hw->v; + double prev_s = hw->s[idx]; + double value = (double)_value;; + double error; + + hw->u = ((hw->params.alpha * value) / prev_s) + (1 - hw->params.alpha) * (hw->u + hw->v); + hw->v = (hw->params.beta * (hw->u - prev_u)) + ((1 - hw->params.beta) * hw->v); + hw->s[idx] = (hw->params.gamma * (value / hw->u)) + ((1 - hw->params.gamma) * prev_s); + + hw->num_values++, idx = (idx + 1) % hw->params.num_season_periods; + + if(hw->params.use_hw_additive_seasonal) + *forecast = (prev_u + prev_v) + prev_s; + else + *forecast = (prev_u + prev_v) * prev_s; + + error = value - *forecast; + hw->sum_square_error += error * error; + *confidence_band = hw->params.ro * (sqrt(hw->sum_square_error / (hw->num_values - hw->params.num_season_periods))); + + return(1); /* We're in business: forecast is meaningful now */ + } +} |