aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-05-19 11:46:03 +0200
committerLuca Deri <deri@ntop.org>2023-05-19 11:46:03 +0200
commit5ca6f0ac62d6b2c346bc99d2a1b1200fe9df7917 (patch)
treed4ba9b547f52c27e8a7f3a65cbb1a0cd1934ca0d
parenta8b3baf2b8d7f4fffadc7d35a335ddd2c76770c2 (diff)
Implemented ndpi_predict_linear() for predicting a timeseries value overtime
-rw-r--r--example/ndpiReader.c15
-rw-r--r--src/include/ndpi_api.h16
-rw-r--r--src/lib/ndpi_analyze.c77
3 files changed, 90 insertions, 18 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index efa5bd666..e43211ddd 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -5150,6 +5150,20 @@ void zscoreUnitTest() {
/* *********************************************** */
+void linearUnitTest() {
+ u_int32_t values[] = {15, 27, 38, 49, 68, 72, 90, 150, 175, 203};
+ u_int32_t prediction;
+ u_int32_t const num = NDPI_ARRAY_LENGTH(values);
+ bool do_trace = false;
+ int rc = ndpi_predict_linear(values, num, 2*num, &prediction);
+
+ if(do_trace) {
+ printf("[rc: %d][predicted value: %u]\n", rc, prediction);
+ }
+}
+
+/* *********************************************** */
+
/**
@brief MAIN FUNCTION
**/
@@ -5189,6 +5203,7 @@ int main(int argc, char **argv) {
exit(0);
#endif
+ linearUnitTest();
zscoreUnitTest();
sesUnitTest();
desUnitTest();
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 7c5523bd6..ecb6c8271 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1833,6 +1833,22 @@ extern "C" {
/* ******************************* */
+ /*
+ * Predicts a value using simple linear regression
+ * Z-Score = (Value - Mean) / StdDev
+ *
+ * @par values = pointer to the individual values to be analyzed [in]
+ * @par num_values = number of 'values' [in]
+ * @par predict_periods = number of periods for which we want to make the prediction [in]
+ * @par prediction = predicted value after 'predict_periods' [out]
+ *
+ * @return The number of outliers found
+ */
+ int ndpi_predict_linear(u_int32_t *values, u_int32_t num_values,
+ u_int32_t predict_periods, u_int32_t *prediction);
+
+ /* ******************************* */
+
u_int32_t ndpi_quick_16_byte_hash(u_int8_t *in_16_bytes_long);
/* ******************************* */
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index 5cd901c75..1b3b9cc4f 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -129,7 +129,7 @@ void ndpi_data_add_value(struct ndpi_analyze_struct *s, const u_int32_t value) {
float ndpi_data_average(struct ndpi_analyze_struct *s) {
if((!s) || (s->num_data_entries == 0))
return(0);
-
+
return((s->num_data_entries == 0) ? 0 : ((float)s->sum_total / (float)s->num_data_entries));
}
@@ -157,7 +157,7 @@ float ndpi_data_variance(struct ndpi_analyze_struct *s) {
return(0);
float v = s->num_data_entries ?
((float)s->stddev.sum_square_total - ((float)s->sum_total * (float)s->sum_total / (float)s->num_data_entries)) / (float)s->num_data_entries : 0.0;
-
+
return((v < 0 /* rounding problem */) ? 0 : v);
}
@@ -176,8 +176,8 @@ float ndpi_data_stddev(struct ndpi_analyze_struct *s) {
/* ********************************************************************************* */
-/*
- Compute the mean on all values
+/*
+ Compute the mean on all values
NOTE: In statistics, there is no difference between the mean and average
*/
float ndpi_data_mean(struct ndpi_analyze_struct *s) {
@@ -695,7 +695,7 @@ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2,
if(threshold && (sum > threshold))
return(-2); /* Sorry they are not similar */
-
+
// printf("%u/%u) [a: %u][b: %u][sum: %u]\n", i, b1->num_bins, a, b, sum);
}
@@ -1144,7 +1144,7 @@ int ndpi_hw_add_value(struct ndpi_hw_struct *hw, const u_int64_t _value, double
double prev_u, prev_v, prev_s, value = (double)_value;
double sq, error, sq_error;
u_int observations;
-
+
if(hw->num_values == hw->params.num_season_periods) {
double avg = ndpi_avg_inline(hw->y, hw->params.num_season_periods);
u_int i;
@@ -1391,7 +1391,7 @@ void ndpi_ses_fitting(double *values, u_int32_t num_values, float *ret_alpha) {
for(alpha=0.1; alpha<0.99; alpha += 0.05) {
struct ndpi_ses_struct ses;
-
+
ndpi_ses_init(&ses, alpha, 0.05);
if(trace)
@@ -1446,7 +1446,7 @@ int ndpi_des_init(struct ndpi_des_struct *des, double alpha, double beta, float
des->params.alpha = alpha;
des->params.beta = beta;
-
+
if((significance < 0) || (significance > 1)) significance = 0.05;
des->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.));
@@ -1460,7 +1460,7 @@ void ndpi_des_reset(struct ndpi_des_struct *des) {
des->num_values = 0;
des->sum_square_error = des->last_forecast = des->last_trend = des->last_value = 0;
}
-
+
/* *********************************************************** */
/*
@@ -1488,15 +1488,15 @@ int ndpi_des_add_value(struct ndpi_des_struct *des, const double _value, double
*forecast = (des->params.alpha * value) + ((1 - des->params.alpha) * (des->last_forecast + des->last_trend));
des->last_trend = (des->params.beta * (*forecast - des->last_forecast)) + ((1 - des->params.beta) * des->last_trend);
}
-
+
error = value - *forecast;
sq_error = error * error;
des->sum_square_error += sq_error, des->prev_error.sum_square_error += sq_error;
-
+
if(des->num_values > 0) {
u_int observations = (des->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? (des->num_values + 1) : ((des->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1);
double sq = sqrt(des->sum_square_error / observations);
-
+
*confidence_band = des->params.ro * sq;
rc = 1;
} else
@@ -1508,12 +1508,12 @@ int ndpi_des_add_value(struct ndpi_des_struct *des, const double _value, double
des->sum_square_error = des->prev_error.sum_square_error;
des->prev_error.num_values_rollup = 0, des->prev_error.sum_square_error = 0;
}
-
+
#ifdef DES_DEBUG
printf("[num_values: %u][[error: %.3f][forecast: %.3f][trend: %.3f[sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n",
des->num_values, error, *forecast, des->last_trend, des->sum_square_error, sq, *confidence_band);
#endif
-
+
return(rc);
}
@@ -1539,7 +1539,7 @@ void ndpi_des_fitting(double *values, u_int32_t num_values, float *ret_alpha, fl
for(beta=0.1; beta<0.99; beta += 0.05) {
for(alpha=0.1; alpha<0.99; alpha += 0.05) {
struct ndpi_des_struct des;
-
+
ndpi_des_init(&des, alpha, beta, 0.05);
if(trace)
@@ -1594,7 +1594,7 @@ u_int ndpi_find_outliers(u_int32_t *values, bool *outliers, u_int32_t num_values
ndpi_init_data_analysis(&a, 3 /* this is the window so we do not need to store values and 3 is enough */);
/* Add values */
- for(i=0; i<num_values; i++)
+ for(i=0; i<num_values; i++)
ndpi_data_add_value(&a, values[i]);
mean = ndpi_data_mean(&a);
@@ -1613,18 +1613,53 @@ u_int ndpi_find_outliers(u_int32_t *values, bool *outliers, u_int32_t num_values
if(is_outlier) ret++;
outliers[i] = is_outlier;
}
-
+
ndpi_free_data_analysis(&a, 0);
return(ret);
}
-/* ************************************************************/
+/* ********************************************************************************* */
+
+/*
+ Simple Linear regression [https://en.wikipedia.org/wiki/Simple_linear_regression]
+ https://www.tutorialspoint.com/c-program-to-compute-linear-regression
+*/
+int ndpi_predict_linear(u_int32_t *values, u_int32_t num_values,
+ u_int32_t predict_periods, u_int32_t *prediction) {
+ u_int i;
+ float m, c, d;
+ float sumx = 0, sumx_square = 0, sumy = 0, sumxy = 0;
+
+ for(i = 0; i < num_values; i++) {
+ float y = values[i];
+ float x = i + 1;
+
+ sumx = sumx+x;
+ sumx_square = sumx_square + (x * x);
+ sumy = sumy + y;
+ sumxy = sumxy + (x * y);
+ }
+
+ d = (num_values * sumx_square) - (sumx * sumx);
+
+ if(d == 0) return(-1);
+
+ m = ((num_values * sumxy) - (sumx * sumy)) / d; /* beta */
+ c = ((sumy * sumx_square) - (sumx * sumxy)) / d; /* alpha */
+
+ *prediction = c + (m * (predict_periods + num_values - 1));
+
+ return(0);
+}
+
+/* ********************************************************************************* */
/* ********************************************************** */
/* http://home.thep.lu.se/~bjorn/crc/crc32_fast.c */
/* ********************************************************** */
+
static uint32_t crc32_for_byte(uint32_t r) {
int j;
@@ -1633,6 +1668,8 @@ static uint32_t crc32_for_byte(uint32_t r) {
return r ^ (uint32_t)0xFF000000L;
}
+/* ********************************************************************************* */
+
/* Any unsigned integer type with at least 32 bits may be used as
* accumulator type for fast crc32-calulation, but unsigned long is
* probably the optimal choice for most systems. */
@@ -1651,6 +1688,8 @@ static void init_tables(uint32_t* table, uint32_t* wtable) {
}
}
+/* ********************************************************************************* */
+
static void __crc32(const void* data, size_t n_bytes, uint32_t* crc) {
static uint32_t table[0x100], wtable[0x100*sizeof(accum_t)];
size_t n_accum = n_bytes/sizeof(accum_t);
@@ -1667,6 +1706,8 @@ static void __crc32(const void* data, size_t n_bytes, uint32_t* crc) {
*crc = table[(uint8_t)*crc ^ ((uint8_t*)data)[i]] ^ *crc >> 8;
}
+/* ********************************************************************************* */
+
u_int32_t ndpi_crc32(const void* data, size_t n_bytes) {
u_int32_t crc = 0;