From 5ca6f0ac62d6b2c346bc99d2a1b1200fe9df7917 Mon Sep 17 00:00:00 2001 From: Luca Deri Date: Fri, 19 May 2023 11:46:03 +0200 Subject: Implemented ndpi_predict_linear() for predicting a timeseries value overtime --- src/include/ndpi_api.h | 16 +++++++++++ src/lib/ndpi_analyze.c | 77 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 75 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 7c5523bd6..ecb6c8271 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -1833,6 +1833,22 @@ extern "C" { /* ******************************* */ + /* + * Predicts a value using simple linear regression + * Z-Score = (Value - Mean) / StdDev + * + * @par values = pointer to the individual values to be analyzed [in] + * @par num_values = number of 'values' [in] + * @par predict_periods = number of periods for which we want to make the prediction [in] + * @par prediction = predicted value after 'predict_periods' [out] + * + * @return The number of outliers found + */ + int ndpi_predict_linear(u_int32_t *values, u_int32_t num_values, + u_int32_t predict_periods, u_int32_t *prediction); + + /* ******************************* */ + u_int32_t ndpi_quick_16_byte_hash(u_int8_t *in_16_bytes_long); /* ******************************* */ diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index 5cd901c75..1b3b9cc4f 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -129,7 +129,7 @@ void ndpi_data_add_value(struct ndpi_analyze_struct *s, const u_int32_t value) { float ndpi_data_average(struct ndpi_analyze_struct *s) { if((!s) || (s->num_data_entries == 0)) return(0); - + return((s->num_data_entries == 0) ? 0 : ((float)s->sum_total / (float)s->num_data_entries)); } @@ -157,7 +157,7 @@ float ndpi_data_variance(struct ndpi_analyze_struct *s) { return(0); float v = s->num_data_entries ? ((float)s->stddev.sum_square_total - ((float)s->sum_total * (float)s->sum_total / (float)s->num_data_entries)) / (float)s->num_data_entries : 0.0; - + return((v < 0 /* rounding problem */) ? 0 : v); } @@ -176,8 +176,8 @@ float ndpi_data_stddev(struct ndpi_analyze_struct *s) { /* ********************************************************************************* */ -/* - Compute the mean on all values +/* + Compute the mean on all values NOTE: In statistics, there is no difference between the mean and average */ float ndpi_data_mean(struct ndpi_analyze_struct *s) { @@ -695,7 +695,7 @@ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, if(threshold && (sum > threshold)) return(-2); /* Sorry they are not similar */ - + // printf("%u/%u) [a: %u][b: %u][sum: %u]\n", i, b1->num_bins, a, b, sum); } @@ -1144,7 +1144,7 @@ int ndpi_hw_add_value(struct ndpi_hw_struct *hw, const u_int64_t _value, double double prev_u, prev_v, prev_s, value = (double)_value; double sq, error, sq_error; u_int observations; - + if(hw->num_values == hw->params.num_season_periods) { double avg = ndpi_avg_inline(hw->y, hw->params.num_season_periods); u_int i; @@ -1391,7 +1391,7 @@ void ndpi_ses_fitting(double *values, u_int32_t num_values, float *ret_alpha) { for(alpha=0.1; alpha<0.99; alpha += 0.05) { struct ndpi_ses_struct ses; - + ndpi_ses_init(&ses, alpha, 0.05); if(trace) @@ -1446,7 +1446,7 @@ int ndpi_des_init(struct ndpi_des_struct *des, double alpha, double beta, float des->params.alpha = alpha; des->params.beta = beta; - + if((significance < 0) || (significance > 1)) significance = 0.05; des->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.)); @@ -1460,7 +1460,7 @@ void ndpi_des_reset(struct ndpi_des_struct *des) { des->num_values = 0; des->sum_square_error = des->last_forecast = des->last_trend = des->last_value = 0; } - + /* *********************************************************** */ /* @@ -1488,15 +1488,15 @@ int ndpi_des_add_value(struct ndpi_des_struct *des, const double _value, double *forecast = (des->params.alpha * value) + ((1 - des->params.alpha) * (des->last_forecast + des->last_trend)); des->last_trend = (des->params.beta * (*forecast - des->last_forecast)) + ((1 - des->params.beta) * des->last_trend); } - + error = value - *forecast; sq_error = error * error; des->sum_square_error += sq_error, des->prev_error.sum_square_error += sq_error; - + if(des->num_values > 0) { u_int observations = (des->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? (des->num_values + 1) : ((des->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1); double sq = sqrt(des->sum_square_error / observations); - + *confidence_band = des->params.ro * sq; rc = 1; } else @@ -1508,12 +1508,12 @@ int ndpi_des_add_value(struct ndpi_des_struct *des, const double _value, double des->sum_square_error = des->prev_error.sum_square_error; des->prev_error.num_values_rollup = 0, des->prev_error.sum_square_error = 0; } - + #ifdef DES_DEBUG printf("[num_values: %u][[error: %.3f][forecast: %.3f][trend: %.3f[sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n", des->num_values, error, *forecast, des->last_trend, des->sum_square_error, sq, *confidence_band); #endif - + return(rc); } @@ -1539,7 +1539,7 @@ void ndpi_des_fitting(double *values, u_int32_t num_values, float *ret_alpha, fl for(beta=0.1; beta<0.99; beta += 0.05) { for(alpha=0.1; alpha<0.99; alpha += 0.05) { struct ndpi_des_struct des; - + ndpi_des_init(&des, alpha, beta, 0.05); if(trace) @@ -1594,7 +1594,7 @@ u_int ndpi_find_outliers(u_int32_t *values, bool *outliers, u_int32_t num_values ndpi_init_data_analysis(&a, 3 /* this is the window so we do not need to store values and 3 is enough */); /* Add values */ - for(i=0; i> 8; } +/* ********************************************************************************* */ + u_int32_t ndpi_crc32(const void* data, size_t n_bytes) { u_int32_t crc = 0; -- cgit v1.2.3