/*
* ndpi_analyze.c
*
* Copyright (C) 2019-23 - ntop.org and contributors
*
* This file is part of nDPI, an open source deep packet inspection
* library.
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* nDPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with nDPI. If not, see .
*
*/
#include
#include
#include
#include
#include
#include
#include /* FLT_EPSILON */
#include "ndpi_api.h"
#include "ndpi_config.h"
#include "third_party/include/hll.h"
#include "third_party/include/kdtree.h"
#include "third_party/include/ball.h"
#include "ndpi_replace_printf.h"
/* ********************************************************************************* */
void ndpi_init_data_analysis(struct ndpi_analyze_struct *ret, u_int16_t _max_series_len) {
u_int32_t len;
memset(ret, 0, sizeof(*ret));
if(_max_series_len > MAX_SERIES_LEN) _max_series_len = MAX_SERIES_LEN;
ret->num_values_array_len = _max_series_len;
if(ret->num_values_array_len > 0) {
len = sizeof(u_int64_t) * ret->num_values_array_len;
if((ret->values = ndpi_malloc(len)) != NULL)
memset(ret->values, 0, len);
else
ret->num_values_array_len = 0;
}
}
/* ********************************************************************************* */
struct ndpi_analyze_struct* ndpi_alloc_data_analysis(u_int16_t _max_series_len) {
struct ndpi_analyze_struct *ret = ndpi_malloc(sizeof(struct ndpi_analyze_struct));
if(ret != NULL)
ndpi_init_data_analysis(ret, _max_series_len);
return(ret);
}
/* ********************************************************************************* */
struct ndpi_analyze_struct* ndpi_alloc_data_analysis_from_series(const u_int32_t *values, u_int16_t num_values) {
u_int16_t i;
struct ndpi_analyze_struct *ret = ndpi_alloc_data_analysis(num_values);
if(ret == NULL) return(NULL);
for(i=0; ivalues) ndpi_free(d->values);
if(free_pointer) ndpi_free(d);
}
/* ********************************************************************************* */
void ndpi_reset_data_analysis(struct ndpi_analyze_struct *d) {
u_int64_t *values_bkp;
u_int32_t num_values_array_len_bpk;
if(!d)
return;
values_bkp = d->values;
num_values_array_len_bpk = d->num_values_array_len;
memset(d, 0, sizeof(struct ndpi_analyze_struct));
d->values = values_bkp;
d->num_values_array_len = num_values_array_len_bpk;
if(d->values)
memset(d->values, 0, sizeof(u_int64_t)*d->num_values_array_len);
}
/* ********************************************************************************* */
/*
Add a new point to analyze
*/
void ndpi_data_add_value(struct ndpi_analyze_struct *s, const u_int64_t value) {
if(!s)
return;
if(s->sum_total == 0)
s->min_val = s->max_val = value;
else {
if(value < s->min_val) s->min_val = value;
if(value > s->max_val) s->max_val = value;
}
s->sum_total += value, s->num_data_entries++;
if(s->num_values_array_len) {
s->values[s->next_value_insert_index] = value;
if(++s->next_value_insert_index == s->num_values_array_len)
s->next_value_insert_index = 0;
}
/*
Optimized stddev calculation
https://www.khanacademy.org/math/probability/data-distributions-a1/summarizing-spread-distributions/a/calculating-standard-deviation-step-by-step
https://math.stackexchange.com/questions/683297/how-to-calculate-standard-deviation-without-detailed-historical-data
http://mathcentral.uregina.ca/QQ/database/QQ.09.02/carlos1.html
*/
s->stddev.sum_square_total += (u_int64_t)value * (u_int64_t)value;
}
/* ********************************************************************************* */
/* Compute the average on all values */
float ndpi_data_average(struct ndpi_analyze_struct *s) {
if((!s) || (s->num_data_entries == 0))
return(0);
return((float)s->sum_total / (float)s->num_data_entries);
}
/* ********************************************************************************* */
u_int64_t ndpi_data_last(struct ndpi_analyze_struct *s) {
if((!s) || (s->num_data_entries == 0) || (s->num_values_array_len == 0))
return(0);
if(s->next_value_insert_index == 0)
return(s->values[s->num_values_array_len-1]);
else
return(s->values[s->next_value_insert_index-1]);
}
/* Return min/max on all values */
u_int64_t ndpi_data_min(struct ndpi_analyze_struct *s) { return(s ? s->min_val : 0); }
u_int64_t ndpi_data_max(struct ndpi_analyze_struct *s) { return(s ? s->max_val : 0); }
/* ********************************************************************************* */
/* Compute the variance on all values */
float ndpi_data_variance(struct ndpi_analyze_struct *s) {
if(!s)
return(0);
float v = s->num_data_entries ?
((float)s->stddev.sum_square_total - ((float)s->sum_total * (float)s->sum_total / (float)s->num_data_entries)) / (float)s->num_data_entries : 0.0;
return((v < 0 /* rounding problem */) ? 0 : v);
}
/* ********************************************************************************* */
/*
See the link below for "Population and sample standard deviation review"
https://www.khanacademy.org/math/statistics-probability/summarizing-quantitative-data/variance-standard-deviation-sample/a/population-and-sample-standard-deviation-review
In nDPI we use an approximate stddev calculation to avoid storing all data in memory
*/
/* Compute the standard deviation on all values */
float ndpi_data_stddev(struct ndpi_analyze_struct *s) {
return(sqrt(ndpi_data_variance(s)));
}
/* ********************************************************************************* */
/*
Compute the mean on all values
NOTE: In statistics, there is no difference between the mean and average
*/
float ndpi_data_mean(struct ndpi_analyze_struct *s) {
return(ndpi_data_average(s));
}
/* ********************************************************************************* */
/* Compute the average only on the sliding window */
float ndpi_data_window_average(struct ndpi_analyze_struct *s) {
if(s && s->num_values_array_len) {
float sum = 0.0;
u_int16_t i, n = ndpi_min(s->num_data_entries, s->num_values_array_len);
if(n == 0)
return(0);
for(i=0; ivalues[i];
return((float)sum / (float)n);
} else
return(0);
}
/* ********************************************************************************* */
/* Compute the variance only on the sliding window */
float ndpi_data_window_variance(struct ndpi_analyze_struct *s) {
if(s && s->num_values_array_len) {
float sum = 0.0, avg = ndpi_data_window_average(s);
u_int16_t i, n = ndpi_min(s->num_data_entries, s->num_values_array_len);
if(n == 0)
return(0);
for(i=0; ivalues[i]-avg, 2);
return((float)sum / (float)n);
} else
return(0);
}
/* ********************************************************************************* */
/* Compute the variance only on the sliding window */
float ndpi_data_window_stddev(struct ndpi_analyze_struct *s) {
return(sqrt(ndpi_data_window_variance(s)));
}
/* ********************************************************************************* */
/*
Compute entropy on the last sliding window values
*/
float ndpi_data_entropy(struct ndpi_analyze_struct *s) {
if(s && s->num_values_array_len) {
int i;
float sum = 0.0, total = 0.0;
for(i=0; inum_values_array_len; i++)
total += s->values[i];
if(fpclassify(total) == FP_ZERO)
return(0);
for (i=0; inum_values_array_len; i++) {
float tmp = (float)s->values[i] / (float)total;
if(tmp > FLT_EPSILON)
sum -= tmp * logf(tmp);
}
return(sum / logf(2.0));
} else
return(0);
}
/* ********************************************************************************* */
void ndpi_data_print_window_values(struct ndpi_analyze_struct *s) {
if(s && s->num_values_array_len) {
u_int16_t i, n = ndpi_min(s->num_data_entries, s->num_values_array_len);
for(i=0; ivalues[i]);
printf("\n");
}
}
/* ********************************************************************************* */
/*
Upload / download ration
-1 Download
0 Mixed
1 Upload
*/
float ndpi_data_ratio(u_int32_t sent, u_int32_t rcvd) {
float s = (float)((int64_t)sent + (int64_t)rcvd);
float d = (float)((int64_t)sent - (int64_t)rcvd);
return((s == 0) ? 0 : (d/s));
}
/* ********************************************************************************* */
const char* ndpi_data_ratio2str(float ratio) {
if(ratio < -0.2) return("Download");
else if(ratio > 0.2) return("Upload");
else return("Mixed");
}
/* ********************************************************************************* */
/* ********************************************************************************* */
int ndpi_hll_init(struct ndpi_hll *hll, u_int8_t bits) {
return(hll_init(hll, bits));
}
void ndpi_hll_destroy(struct ndpi_hll *hll) {
hll_destroy(hll);
}
void ndpi_hll_reset(struct ndpi_hll *hll) {
hll_reset(hll);
}
int ndpi_hll_add(struct ndpi_hll *hll, const char *data, size_t data_len) {
return(hll_add(hll, (const void *)data, data_len));
}
/* 1 = rank changed, 0 = no changes in rank */
int ndpi_hll_add_number(struct ndpi_hll *hll, u_int32_t value) {
return(hll_add(hll, (const void *)&value, sizeof(value)));
}
double ndpi_hll_count(struct ndpi_hll *hll) {
return(hll_count(hll));
}
/* ********************************************************************************* */
/* ********************************************************************************* */
int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int16_t num_bins) {
if(!b)
return(-1);
b->num_bins = num_bins, b->family = f, b->is_empty = 1;
switch(f) {
case ndpi_bin_family8:
if((b->u.bins8 = (u_int8_t*)ndpi_calloc(num_bins, sizeof(u_int8_t))) == NULL)
return(-1);
break;
case ndpi_bin_family16:
if((b->u.bins16 = (u_int16_t*)ndpi_calloc(num_bins, sizeof(u_int16_t))) == NULL)
return(-1);
break;
case ndpi_bin_family32:
if((b->u.bins32 = (u_int32_t*)ndpi_calloc(num_bins, sizeof(u_int32_t))) == NULL)
return(-1);
break;
case ndpi_bin_family64:
if((b->u.bins64 = (u_int64_t*)ndpi_calloc(num_bins, sizeof(u_int64_t))) == NULL)
return(-1);
break;
}
return(0);
}
/* ********************************************************************************* */
void ndpi_free_bin(struct ndpi_bin *b) {
if(!b || !b->u.bins8)
return;
switch(b->family) {
case ndpi_bin_family8:
ndpi_free(b->u.bins8);
break;
case ndpi_bin_family16:
ndpi_free(b->u.bins16);
break;
case ndpi_bin_family32:
ndpi_free(b->u.bins32);
break;
case ndpi_bin_family64:
ndpi_free(b->u.bins64);
break;
}
}
/* ********************************************************************************* */
struct ndpi_bin* ndpi_clone_bin(struct ndpi_bin *b) {
struct ndpi_bin *out;
if(!b || !b->u.bins8) return(NULL);
out = (struct ndpi_bin*)ndpi_malloc(sizeof(struct ndpi_bin));
if(!out) return(NULL);
out->num_bins = b->num_bins, out->family = b->family, out->is_empty = b->is_empty;
switch(out->family) {
case ndpi_bin_family8:
if((out->u.bins8 = (u_int8_t*)ndpi_calloc(out->num_bins, sizeof(u_int8_t))) == NULL) {
ndpi_free(out);
return(NULL);
} else
memcpy(out->u.bins8, b->u.bins8, out->num_bins*sizeof(u_int8_t));
break;
case ndpi_bin_family16:
if((out->u.bins16 = (u_int16_t*)ndpi_calloc(out->num_bins, sizeof(u_int16_t))) == NULL) {
ndpi_free(out);
return(NULL);
} else
memcpy(out->u.bins16, b->u.bins16, out->num_bins*sizeof(u_int16_t));
break;
case ndpi_bin_family32:
if((out->u.bins32 = (u_int32_t*)ndpi_calloc(out->num_bins, sizeof(u_int32_t))) == NULL) {
ndpi_free(out);
return(NULL);
} else
memcpy(out->u.bins32, b->u.bins32, out->num_bins*sizeof(u_int32_t));
break;
case ndpi_bin_family64:
if((out->u.bins64 = (u_int64_t*)ndpi_calloc(out->num_bins, sizeof(u_int64_t))) == NULL) {
ndpi_free(out);
return(NULL);
} else
memcpy(out->u.bins64, b->u.bins64, out->num_bins*sizeof(u_int64_t));
break;
}
return(out);
}
/* ********************************************************************************* */
void ndpi_set_bin(struct ndpi_bin *b, u_int16_t slot_id, u_int64_t val) {
if(!b || !b->u.bins8 || b->num_bins == 0)
return;
if(slot_id >= b->num_bins) slot_id = b->num_bins - 1;
switch(b->family) {
case ndpi_bin_family8:
b->u.bins8[slot_id] = (u_int8_t)val;
break;
case ndpi_bin_family16:
b->u.bins16[slot_id] = (u_int16_t)val;
break;
case ndpi_bin_family32:
b->u.bins32[slot_id] = (u_int32_t)val;
break;
case ndpi_bin_family64:
b->u.bins64[slot_id] = (u_int64_t)val;
break;
}
}
/* ********************************************************************************* */
void ndpi_inc_bin(struct ndpi_bin *b, u_int16_t slot_id, u_int64_t val) {
if(!b || !b->u.bins8 || b->num_bins == 0)
return;
b->is_empty = 0;
if(slot_id >= b->num_bins) slot_id = b->num_bins - 1;
switch(b->family) {
case ndpi_bin_family8:
b->u.bins8[slot_id] += (u_int8_t)val;
break;
case ndpi_bin_family16:
b->u.bins16[slot_id] += (u_int16_t)val;
break;
case ndpi_bin_family32:
b->u.bins32[slot_id] += (u_int32_t)val;
break;
case ndpi_bin_family64:
b->u.bins64[slot_id] += (u_int64_t)val;
break;
}
}
/* ********************************************************************************* */
u_int64_t ndpi_get_bin_value(struct ndpi_bin *b, u_int16_t slot_id) {
if(!b || !b->u.bins8 || b->num_bins == 0)
return(0);
if(slot_id >= b->num_bins) slot_id = b->num_bins - 1;
switch(b->family) {
case ndpi_bin_family8:
return(b->u.bins8[slot_id]);
case ndpi_bin_family16:
return(b->u.bins16[slot_id]);
case ndpi_bin_family32:
return(b->u.bins32[slot_id]);
case ndpi_bin_family64:
return(b->u.bins64[slot_id]);
}
return(0);
}
/* ********************************************************************************* */
void ndpi_reset_bin(struct ndpi_bin *b) {
if(!b || !b->u.bins8)
return;
b->is_empty = 1;
switch(b->family) {
case ndpi_bin_family8:
memset(b->u.bins8, 0, sizeof(u_int8_t)*b->num_bins);
break;
case ndpi_bin_family16:
memset(b->u.bins16, 0, sizeof(u_int16_t)*b->num_bins);
break;
case ndpi_bin_family32:
memset(b->u.bins32, 0, sizeof(u_int32_t)*b->num_bins);
break;
case ndpi_bin_family64:
memset(b->u.bins64, 0, sizeof(u_int64_t)*b->num_bins);
break;
}
}
/* ********************************************************************************* */
/*
Each bin slot is transformed in a % with respect to the value total
*/
void ndpi_normalize_bin(struct ndpi_bin *b) {
u_int16_t i;
u_int32_t tot = 0;
if(!b || b->is_empty) return;
switch(b->family) {
case ndpi_bin_family8:
for(i=0; inum_bins; i++) tot += b->u.bins8[i];
if(tot > 0) {
for(i=0; inum_bins; i++)
b->u.bins8[i] = (b->u.bins8[i]*100) / tot;
}
break;
case ndpi_bin_family16:
for(i=0; inum_bins; i++) tot += b->u.bins16[i];
if(tot > 0) {
for(i=0; inum_bins; i++)
b->u.bins16[i] = (b->u.bins16[i]*100) / tot;
}
break;
case ndpi_bin_family32:
for(i=0; inum_bins; i++) tot += b->u.bins32[i];
if(tot > 0) {
for(i=0; inum_bins; i++)
b->u.bins32[i] = (b->u.bins32[i]*100) / tot;
}
break;
case ndpi_bin_family64:
for(i=0; inum_bins; i++) tot += b->u.bins64[i];
if(tot > 0) {
for(i=0; inum_bins; i++)
b->u.bins64[i] = (b->u.bins64[i]*100) / tot;
}
break;
}
}
/* ********************************************************************************* */
char* ndpi_print_bin(struct ndpi_bin *b, u_int8_t normalize_first, char *out_buf, u_int out_buf_len) {
u_int16_t i;
u_int len = 0;
if(!b || !b->u.bins8 || !out_buf) return(out_buf); else out_buf[0] = '\0';
if(normalize_first)
ndpi_normalize_bin(b);
switch(b->family) {
case ndpi_bin_family8:
for(i=0; inum_bins; i++) {
int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%u", (i > 0) ? "," : "", b->u.bins8[i]);
if(rc < 0 || (u_int)rc >= out_buf_len-len) break;
len += rc;
}
break;
case ndpi_bin_family16:
for(i=0; inum_bins; i++) {
int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%u", (i > 0) ? "," : "", b->u.bins16[i]);
if(rc < 0 || (u_int)rc >= out_buf_len-len) break;
len += rc;
}
break;
case ndpi_bin_family32:
for(i=0; inum_bins; i++) {
int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%u", (i > 0) ? "," : "", b->u.bins32[i]);
if(rc < 0 || (u_int)rc >= out_buf_len-len) break;
len += rc;
}
break;
case ndpi_bin_family64:
for(i=0; inum_bins; i++) {
int rc = ndpi_snprintf(&out_buf[len], out_buf_len-len, "%s%llu", (i > 0) ? "," : "", (unsigned long long)b->u.bins64[i]);
if(rc < 0 || (u_int)rc >= out_buf_len-len) break;
len += rc;
}
break;
}
return(out_buf);
}
/* ********************************************************************************* */
// #define COSINE_SIMILARITY
/*
Determines how similar are two bins
Cosine Similiarity
0 = Very differet
... (gray zone)
1 = Alike
See https://en.wikipedia.org/wiki/Cosine_similarity for more details
---
Euclidean similarity
0 = alike
...
the higher the more different
if similarity_max_threshold != 0, we assume that bins arent similar
*/
float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2,
u_int8_t normalize_first, float similarity_max_threshold) {
u_int16_t i;
float threshold = similarity_max_threshold*similarity_max_threshold;
if(!b1 || !b2)
return(-1);
if(
// (b1->family != b2->family) ||
(b1->num_bins != b2->num_bins))
return(-1);
if(normalize_first)
ndpi_normalize_bin(b1), ndpi_normalize_bin(b2);
#ifdef COSINE_SIMILARITY
{
u_int32_t sumxx = 0, sumxy = 0, sumyy = 0;
for(i=0; inum_bins; i++) {
u_int32_t a = ndpi_get_bin_value(b1, i);
u_int32_t b = ndpi_get_bin_value(b2, i);
sumxx += a*a, sumyy += b*b, sumxy += a*b;
}
if((sumxx == 0) || (sumyy == 0))
return(0);
else
return((float)sumxy / sqrt((float)(sumxx * sumyy)));
}
#else
{
double sum = 0;
for(i=0; inum_bins; i++) {
u_int32_t a = ndpi_get_bin_value(b1, i);
u_int32_t b = ndpi_get_bin_value(b2, i);
u_int32_t diff = (a > b) ? (a - b) : (b - a);
if(a != b) sum += pow(diff, 2);
if(threshold && (sum > threshold))
return(-2); /* Sorry they are not similar */
// printf("%u/%u) [a: %u][b: %u][sum: %u]\n", i, b1->num_bins, a, b, sum);
}
/* The lower the more similar */
return(sqrt(sum));
}
#endif
}
/* ********************************************************************************* */
//#define DEBUG_CLUSTER_BINS
#define MAX_NUM_CLUSTERS 128
/*
Clusters bins into 'num_clusters'
- (in) bins: a vection 'num_bins' long of bins to cluster
- (in) 'num_clusters': number of desired clusters 0...(num_clusters-1)
- (out) 'cluster_ids': a vector 'num_bins' long containing the id's of each clustered bin
- (out) 'centroids': an optional 'num_clusters' long vector of (centroid) bins
See
- https://en.wikipedia.org/wiki/K-means_clustering
*/
int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins,
u_int8_t num_clusters, u_int16_t *cluster_ids,
struct ndpi_bin *centroids) {
u_int16_t i, j, max_iterations = 25, num_iterations, num_moves;
u_int8_t alloc_centroids = 0;
char out_buf[256];
float *bin_score;
u_int16_t num_cluster_elems[MAX_NUM_CLUSTERS] = { 0 };
(void)out_buf;
srand(time(NULL));
if(!bins || num_bins == 0 || !cluster_ids || num_clusters == 0)
return(-1);
if(num_clusters > num_bins) num_clusters = num_bins;
if(num_clusters > MAX_NUM_CLUSTERS) num_clusters = MAX_NUM_CLUSTERS;
#ifdef DEBUG_CLUSTER_BINS
printf("Distributing %u bins over %u clusters\n", num_bins, num_clusters);
#endif
if((bin_score = (float*)ndpi_calloc(num_bins, sizeof(float))) == NULL)
return(-2);
if(centroids == NULL) {
alloc_centroids = 1;
if((centroids = (struct ndpi_bin*)ndpi_malloc(sizeof(struct ndpi_bin)*num_clusters)) == NULL) {
ndpi_free(bin_score);
return(-2);
} else {
for(i=0; i best_similarity) {
cluster_id = j, best_similarity = similarity;
}
#else
if(similarity < best_similarity) {
cluster_id = j, best_similarity = similarity;
}
#endif
}
if((best_similarity == current_similarity) && (num_cluster_elems[cluster_ids[i]] > 1)) {
/*
In case of identical similarity let's leave things as they are
this unless this is a cluster with only one element
*/
cluster_id = cluster_ids[i];
}
bin_score[i] = best_similarity;
if(cluster_ids[i] != cluster_id) {
#ifdef DEBUG_CLUSTER_BINS
printf("Moved bin %u from cluster %u -> %u [similarity: %f]\n",
i, cluster_ids[i], cluster_id, best_similarity);
#endif
num_cluster_elems[cluster_ids[i]]--;
num_cluster_elems[cluster_id]++;
cluster_ids[i] = cluster_id;
num_moves++;
}
}
if(num_moves == 0)
break;
#ifdef DEBUG_CLUSTER_BINS
for(j=0; j 1))
score = bin_score[i], candidate = i;
}
#else
score = 0;
for(i=0; i score) && (num_cluster_elems[cluster_ids[i]] > 1))
score = bin_score[i], candidate = i;
}
#endif
if(verbose)
printf("Rebalance: moving bin %u from cluster %u -> %u [similarity: %f]\n",
candidate, cluster_ids[candidate], j, score);
num_cluster_elems[cluster_ids[candidate]]--;
num_cluster_elems[j]++;
cluster_ids[candidate] = j;
}
}
#endif
} /* while(...) */
if(alloc_centroids) {
for(i=0; iempty = 1, s->num_values = num_learning_values;
s->gains = (u_int32_t*)ndpi_calloc(num_learning_values, sizeof(u_int32_t));
s->losses = (u_int32_t*)ndpi_calloc(num_learning_values, sizeof(u_int32_t));
if(s->gains && s->losses) {
s->last_value = 0;
return(0);
} else {
if(s->gains) ndpi_free(s->gains);
if(s->losses) ndpi_free(s->losses);
return(-1);
}
}
/* ************************************* */
void ndpi_free_rsi(struct ndpi_rsi_struct *s) {
ndpi_free(s->gains), ndpi_free(s->losses);
}
/* ************************************* */
// #define DEBUG_RSI
/*
This function adds a new value and returns the computed RSI, or -1
if there are too few points (< num_learning_values)
RSI < 30 (too many losses)
RSI > 70 (too many gains)
*/
float ndpi_rsi_add_value(struct ndpi_rsi_struct *s, const u_int32_t value) {
float relative_strength;
if(!s->empty) {
u_int32_t val;
s->total_gains -= s->gains[s->next_index], s->total_losses -= s->losses[s->next_index];
if(value > s->last_value) {
val = value - s->last_value;
s->gains[s->next_index] = val, s->losses[s->next_index] = 0;
s->total_gains += val;
#ifdef DEBUG_RSI
printf("Gain: %u\n", val);
#endif
} else {
val = s->last_value - value;
s->losses[s->next_index] = val, s->gains[s->next_index] = 0;
s->total_losses += val;
#ifdef DEBUG_RSI
printf("Loss: %u\n", val);
#endif
}
#ifdef DEBUG_RSI
printf("[value: %u][total_gains: %u][total_losses: %u][cur_idx: %u]\n", value, s->total_gains, s->total_losses, s->next_index);
#endif
}
s->last_value = value, s->next_index = (s->next_index + 1) % s->num_values, s->empty = 0;
if(s->next_index == 0) s->rsi_ready = 1; /* We have completed one round */
if(!s->rsi_ready)
return(-1); /* Too early */
else if(s->total_losses == 0) /* Avoid division by zero (**) */
return(100.);
else {
relative_strength = (float)s->total_gains / (float)s->total_losses; /* (**) */
#ifdef DEBUG_RSI
printf("RSI: %f\n", relative_strength);
#endif
return(100. - (100. / (1. + relative_strength)));
}
}
/* *********************************************************** */
/* https://www.johndcook.com/blog/cpp_phi_inverse/ */
static double ndpi_rational_approximation(double t) {
// Abramowitz and Stegun formula 26.2.23.
// The absolute value of the error should be less than 4.5 e-4.
double c[] = { 2.515517, 0.802853, 0.010328 };
double d[] = { 1.432788, 0.189269, 0.001308 };
return(t - ((c[2]*t + c[1])*t + c[0]) / (((d[2]*t + d[1])*t + d[0])*t + 1.0));
}
static double ndpi_normal_cdf_inverse(double p) {
if(p <= 0.0 || p >= 1.0)
return(0); /* Invalid argument: valid range 0 < X < 1 */
if(p < 0.5) {
// F^-1(p) = - G^-1(p)
return -ndpi_rational_approximation( sqrt(-2.0*log(p)) );
} else {
// F^-1(p) = G^-1(1-p)
return ndpi_rational_approximation( sqrt(-2.0*log(1-p)) );
}
}
double ndpi_avg_inline(u_int64_t *v, u_int num) {
double avg = 0;
u_int i;
for(i=0; iparams.num_season_periods = num_periods + 1;
hw->params.alpha = alpha;
hw->params.beta = beta;
hw->params.gamma = gamma;
hw->params.use_hw_additive_seasonal = additive_seeasonal;
if((significance < 0) || (significance > 1)) significance = 0.05;
hw->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.));
if((hw->y = (u_int64_t*)ndpi_calloc(hw->params.num_season_periods, sizeof(u_int64_t))) == NULL)
return(-1);
if((hw->s = (double*)ndpi_calloc(hw->params.num_season_periods, sizeof(double))) == NULL) {
ndpi_free(hw->y);
hw->y = NULL;
return(-1);
}
return(0);
}
/* *********************************************************** */
/* Frees the memory allocated by ndpi_hw_init() */
void ndpi_hw_free(struct ndpi_hw_struct *hw) {
if(hw->y) ndpi_free(hw->y);
if(hw->s) ndpi_free(hw->s);
}
/* *********************************************************** */
/*
Returns the forecast and the band (forecast +/- band are the upper and lower values)
Input
hw: Datastructure previously initialized
value The value to add to the measurement
Output
forecast The forecasted value
confidence_band The value +/- on which the value should fall is not an anomaly
Return code
0 Too early: we're still in the learning phase. Output values are zero.
1 Normal processing: forecast and confidence_band are meaningful
*/
int ndpi_hw_add_value(struct ndpi_hw_struct *hw, const u_int64_t _value, double *forecast, double *confidence_band) {
if(hw->num_values < hw->params.num_season_periods) {
hw->y[hw->num_values++] = _value;
*forecast = 0;
*confidence_band = 0;
return(0); /* Too early still training... */
} else {
u_int idx = hw->num_values % hw->params.num_season_periods;
double prev_u, prev_v, prev_s, value = (double)_value;
double sq, error, sq_error;
u_int observations;
if(hw->num_values == hw->params.num_season_periods) {
double avg = ndpi_avg_inline(hw->y, hw->params.num_season_periods);
u_int i;
if(avg == 0) avg = 1; /* Avoid divisions by zero */
for(i=0; iparams.num_season_periods; i++)
hw->s[i] = hw->y[i] / avg;
i = hw->params.num_season_periods-1;
if(hw->s[i] == 0)
hw->u = 0;
else
hw->u = _value / hw->s[i];
hw->v = 0;
ndpi_free(hw->y);
hw->y = NULL;
}
idx = hw->num_values % hw->params.num_season_periods;
prev_u = hw->u, prev_v = hw->v, prev_s = hw->s[idx];
if(prev_s != 0)
hw->u = ((hw->params.alpha * value) / prev_s) + ( 1 - hw->params.alpha) * (hw->u + hw->v);
else
hw->u = 0; /* Avoid divisions by zero */
hw->v = (hw->params.beta * (hw->u - prev_u)) + ((1 - hw->params.beta ) * hw->v);
if(hw->u != 0)
hw->s[idx] = (hw->params.gamma * (value / hw->u)) + ((1 - hw->params.gamma) * prev_s);
else
hw->s[idx] = 0; /* Avoid divisions by zero */
if(hw->params.use_hw_additive_seasonal)
*forecast = (prev_u + prev_v) + prev_s;
else
*forecast = (prev_u + prev_v) * prev_s;
error = value - *forecast;
sq_error = error * error;
hw->sum_square_error += sq_error, hw->prev_error.sum_square_error += sq_error;
observations = (hw->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? hw->num_values : ((hw->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1);
sq = sqrt(hw->sum_square_error / observations);
*confidence_band = hw->params.ro * sq;
#ifdef HW_DEBUG
printf("[num_values: %u][u: %.3f][v: %.3f][s: %.3f][error: %.3f][forecast: %.3f][sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n",
hw->num_values, hw->u, hw->v, hw->s[idx], error,
*forecast, hw->sum_square_error,
sq, *confidence_band);
#endif
hw->num_values++, idx = (idx + 1) % hw->params.num_season_periods;
if(++hw->prev_error.num_values_rollup == MAX_SQUARE_ERROR_ITERATIONS) {
hw->sum_square_error = hw->prev_error.sum_square_error;
hw->prev_error.num_values_rollup = 0, hw->prev_error.sum_square_error = 0;
}
return(1); /* We're in business: forecast is meaningful now */
}
}
/* *********************************************************** */
void ndpi_hw_reset(struct ndpi_hw_struct *hw) {
hw->prev_error.sum_square_error = 0, hw->prev_error.num_values_rollup = 0;
hw->num_values = 0;
hw->u = hw->v = hw->sum_square_error = 0;
if(hw->y)
memset(hw->y, 0, (hw->params.num_season_periods * sizeof(u_int64_t)));
if(hw->s)
memset(hw->s, 0, (hw->params.num_season_periods * sizeof(double)));
}
/* ********************************************************************************* */
/* ********************************************************************************* */
/*
Jitter calculator
Used to determine how noisy is a signal
*/
int ndpi_jitter_init(struct ndpi_jitter_struct *s, u_int16_t num_learning_values) {
if(!s)
return(-1);
memset(s, 0, sizeof(struct ndpi_jitter_struct));
if(num_learning_values < 2) num_learning_values = 2;
s->empty = 1, s->num_values = num_learning_values;
s->observations = (float*)ndpi_calloc(num_learning_values, sizeof(float));
if(s->observations) {
s->last_value = 0;
return(0);
} else
return(-1);
}
/* ************************************* */
void ndpi_jitter_free(struct ndpi_jitter_struct *s) {
ndpi_free(s->observations);
}
/* ************************************* */
/*
This function adds a new value and returns the computed Jitter
*/
float ndpi_jitter_add_value(struct ndpi_jitter_struct *s, const float value) {
float val = fabsf(value - s->last_value);
if(s->empty && (s->next_index == 0))
; /* Skip the first value as we are unable to calculate the difference */
else {
s->jitter_total -= s->observations[s->next_index];
s->observations[s->next_index] = val;
s->jitter_total += val;
}
s->last_value = value, s->next_index = (s->next_index + 1) % s->num_values;
if(s->next_index == 0) s->jitter_ready = 1; /* We have completed one round */
#ifdef DEBUG_JITTER
printf("[JITTER] [value: %.3f][diff: %.3f][jitter_total: %.3f] -> %.3f\n",
value, val, s->jitter_total,
s->jitter_ready ? (s->jitter_total / s->num_values) : -1);
#endif
if(!s->jitter_ready)
return(-1); /* Too early */
else
return(s->jitter_total / s->num_values);
}
/* *********************************************************** */
/* *********************************************************** */
/*
Single Exponential Smoothing
*/
int ndpi_ses_init(struct ndpi_ses_struct *ses, double alpha, float significance) {
if(!ses)
return(-1);
memset(ses, 0, sizeof(struct ndpi_ses_struct));
ses->params.alpha = alpha;
if((significance < 0) || (significance > 1)) significance = 0.05;
ses->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.));
return(0);
}
/* *********************************************************** */
/*
Returns the forecast and the band (forecast +/- band are the upper and lower values)
Input
ses: Datastructure previously initialized
value The value to add to the measurement
Output
forecast The forecasted value
confidence_band The value +/- on which the value should fall is not an anomaly
Return code
0 Too early: we're still in the learning phase. Output values are zero.
1 Normal processing: forecast and confidence_band are meaningful
*/
int ndpi_ses_add_value(struct ndpi_ses_struct *ses, const double _value, double *forecast, double *confidence_band) {
double value = (double)_value, error, sq_error;
int rc;
if(ses->num_values == 0)
*forecast = value;
else
*forecast = (ses->params.alpha * (ses->last_value - ses->last_forecast)) + ses->last_forecast;
error = value - *forecast;
sq_error = error * error;
ses->sum_square_error += sq_error, ses->prev_error.sum_square_error += sq_error;
if(ses->num_values > 0) {
u_int observations = (ses->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? (ses->num_values + 1) : ((ses->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1);
double sq = sqrt(ses->sum_square_error / observations);
*confidence_band = ses->params.ro * sq;
rc = 1;
} else
*confidence_band = 0, rc = 0;
ses->num_values++, ses->last_value = value, ses->last_forecast = *forecast;
if(++ses->prev_error.num_values_rollup == MAX_SQUARE_ERROR_ITERATIONS) {
ses->sum_square_error = ses->prev_error.sum_square_error;
ses->prev_error.num_values_rollup = 0, ses->prev_error.sum_square_error = 0;
}
#ifdef SES_DEBUG
printf("[num_values: %u][[error: %.3f][forecast: %.3f][sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n",
ses->num_values, error, *forecast, ses->sum_square_error, sq_error, *confidence_band);
#endif
return(rc);
}
/* *********************************************************** */
void ndpi_ses_reset(struct ndpi_ses_struct *ses) {
ses->prev_error.sum_square_error = 0, ses->prev_error.num_values_rollup = 0;
ses->num_values = 0;
ses->sum_square_error = ses->last_forecast = ses->last_value = 0;
}
/* *********************************************************** */
/*
Computes the best alpha value using the specified values used for training
*/
void ndpi_ses_fitting(double *values, u_int32_t num_values, float *ret_alpha) {
u_int i;
float alpha, best_alpha;
double sse, lowest_sse;
if(!values || num_values == 0) {
*ret_alpha = 0;
return;
}
lowest_sse = 0, best_alpha = 0;
for(alpha=0.1; alpha<0.99; alpha += 0.05) {
struct ndpi_ses_struct ses;
ndpi_ses_init(&ses, alpha, 0.05);
#ifdef SES_DEBUG
printf("\nDouble Exponential Smoothing [alpha: %.2f]\n", alpha);
#endif
sse = 0;
for(i=0; iparams.alpha = alpha;
des->params.beta = beta;
if((significance < 0) || (significance > 1)) significance = 0.05;
des->params.ro = ndpi_normal_cdf_inverse(1 - (significance / 2.));
return(0);
}
/* *********************************************************** */
void ndpi_des_reset(struct ndpi_des_struct *des) {
des->prev_error.sum_square_error = 0, des->prev_error.num_values_rollup = 0;
des->num_values = 0;
des->sum_square_error = des->last_forecast = des->last_trend = des->last_value = 0;
}
/* *********************************************************** */
/*
Returns the forecast and the band (forecast +/- band are the upper and lower values)
Input
des: Datastructure previously initialized
value The value to add to the measurement
Output
forecast The forecasted value
confidence_band The value +/- on which the value should fall is not an anomaly
Return code
0 Too early: we're still in the learning phase. Output values are zero.
1 Normal processing: forecast and confidence_band are meaningful
*/
int ndpi_des_add_value(struct ndpi_des_struct *des, const double _value, double *forecast, double *confidence_band) {
double value = (double)_value, error, sq_error;
int rc;
if(des->num_values == 0)
*forecast = value, des->last_trend = 0;
else {
*forecast = (des->params.alpha * value) + ((1 - des->params.alpha) * (des->last_forecast + des->last_trend));
des->last_trend = (des->params.beta * (*forecast - des->last_forecast)) + ((1 - des->params.beta) * des->last_trend);
}
error = value - *forecast;
sq_error = error * error;
des->sum_square_error += sq_error, des->prev_error.sum_square_error += sq_error;
if(des->num_values > 0) {
u_int observations = (des->num_values < MAX_SQUARE_ERROR_ITERATIONS) ? (des->num_values + 1) : ((des->num_values % MAX_SQUARE_ERROR_ITERATIONS) + MAX_SQUARE_ERROR_ITERATIONS + 1);
double sq = sqrt(des->sum_square_error / observations);
*confidence_band = des->params.ro * sq;
rc = 1;
} else
*confidence_band = 0, rc = 0;
des->num_values++, des->last_value = value, des->last_forecast = *forecast;
if(++des->prev_error.num_values_rollup == MAX_SQUARE_ERROR_ITERATIONS) {
des->sum_square_error = des->prev_error.sum_square_error;
des->prev_error.num_values_rollup = 0, des->prev_error.sum_square_error = 0;
}
#ifdef DES_DEBUG
printf("[num_values: %u][[error: %.3f][forecast: %.3f][trend: %.3f[sqe: %.3f][sq: %.3f][confidence_band: %.3f]\n",
des->num_values, error, *forecast, des->last_trend, des->sum_square_error, sq, *confidence_band);
#endif
return(rc);
}
/* *********************************************************** */
/*
Computes the best alpha and beta values using the specified values used for training
*/
void ndpi_des_fitting(double *values, u_int32_t num_values, float *ret_alpha, float *ret_beta) {
u_int i;
float alpha, best_alpha, best_beta, beta = 0;
double sse, lowest_sse;
if(!values || num_values == 0) {
*ret_alpha = 0;
*ret_beta = 0;
return;
}
lowest_sse = 0, best_alpha = 0, best_beta = 0;
for(beta=0.1; beta<0.99; beta += 0.05) {
for(alpha=0.1; alpha<0.99; alpha += 0.05) {
struct ndpi_des_struct des;
ndpi_des_init(&des, alpha, beta, 0.05);
#ifdef DES_DEBUG
printf("\nDouble Exponential Smoothing [alpha: %.2f][beta: %.2f]\n", alpha, beta);
#endif
sse = 0;
for(i=0; i high_threshold)) ? true : false;
if(is_outlier) ret++;
outliers[i] = is_outlier;
}
ndpi_free_data_analysis(&a, 0);
return(ret);
}
/* *********************************************************** */
/* Check if the specified value is an outlier with respect to the past values */
bool ndpi_is_outlier(u_int32_t *past_values, u_int32_t num_past_values,
u_int32_t value_to_check, float threshold,
float *lower, float *upper) {
struct ndpi_analyze_struct *data = ndpi_alloc_data_analysis_from_series(past_values, num_past_values);
float mean, stddev, v;
if(!data) return(false);
mean = ndpi_data_mean(data);
stddev = ndpi_data_stddev(data);
/* The mimimum threshold is 1 (i.e. the value of the stddev) */
if(threshold < 1.) threshold = 1.;
v = threshold * stddev;
*lower = mean - v, *upper = mean + v;
ndpi_free_data_analysis(data, 1 /* free memory */);
return(((value_to_check < *lower) || (value_to_check > *upper)) ? true : false);
}
/* ********************************************************************************* */
/*
Simple Linear regression [https://en.wikipedia.org/wiki/Simple_linear_regression]
https://www.tutorialspoint.com/c-program-to-compute-linear-regression
*/
int ndpi_predict_linear(u_int32_t *values, u_int32_t num_values,
u_int32_t predict_periods, u_int32_t *prediction) {
u_int i;
float m, c, d;
float sumx = 0, sumx_square = 0, sumy = 0, sumxy = 0;
for(i = 0; i < num_values; i++) {
float y = values[i];
float x = i + 1;
sumx = sumx+x;
sumx_square = sumx_square + (x * x);
sumy = sumy + y;
sumxy = sumxy + (x * y);
}
d = (num_values * sumx_square) - (sumx * sumx);
if(d == 0) return(-1);
m = ((num_values * sumxy) - (sumx * sumy)) / d; /* beta */
c = ((sumy * sumx_square) - (sumx * sumxy)) / d; /* alpha */
*prediction = c + (m * (predict_periods + num_values - 1));
return(0);
}
/* ********************************************************************************* */
double ndpi_pearson_correlation(u_int32_t *values_a, u_int32_t *values_b, u_int16_t num_values) {
double sum_a = 0, sum_b = 0, sum_squared_diff_a = 0, sum_squared_diff_b = 0, sum_product_diff = 0;
u_int16_t i;
double mean_a, mean_b, variance_a, variance_b, covariance;
if(num_values == 0) return(0.0);
for(i = 0; i < num_values; i++)
sum_a += values_a[i], sum_b += values_b[i];
mean_a = sum_a / num_values, mean_b = sum_b / num_values;
for(i = 0; i < num_values; i++)
sum_squared_diff_a += pow(values_a[i] - mean_a, 2),
sum_squared_diff_b += pow(values_b[i] - mean_b, 2),
sum_product_diff += (values_a[i] - mean_a) * (values_b[i] - mean_b);
variance_a = sum_squared_diff_a / (double)num_values, variance_b = sum_squared_diff_b / (double)num_values;
covariance = sum_product_diff / (double)num_values;
if(variance_a == 0.0 || variance_b == 0.0)
return(0.0);
return(covariance / sqrt(variance_a * variance_b));
}
/* ********************************************************************************* */
/* ********************************************************************************* */
static const u_int16_t crc16_ccitt_table[256] = {
0x0000, 0x1189, 0x2312, 0x329B, 0x4624, 0x57AD, 0x6536, 0x74BF,
0x8C48, 0x9DC1, 0xAF5A, 0xBED3, 0xCA6C, 0xDBE5, 0xE97E, 0xF8F7,
0x1081, 0x0108, 0x3393, 0x221A, 0x56A5, 0x472C, 0x75B7, 0x643E,
0x9CC9, 0x8D40, 0xBFDB, 0xAE52, 0xDAED, 0xCB64, 0xF9FF, 0xE876,
0x2102, 0x308B, 0x0210, 0x1399, 0x6726, 0x76AF, 0x4434, 0x55BD,
0xAD4A, 0xBCC3, 0x8E58, 0x9FD1, 0xEB6E, 0xFAE7, 0xC87C, 0xD9F5,
0x3183, 0x200A, 0x1291, 0x0318, 0x77A7, 0x662E, 0x54B5, 0x453C,
0xBDCB, 0xAC42, 0x9ED9, 0x8F50, 0xFBEF, 0xEA66, 0xD8FD, 0xC974,
0x4204, 0x538D, 0x6116, 0x709F, 0x0420, 0x15A9, 0x2732, 0x36BB,
0xCE4C, 0xDFC5, 0xED5E, 0xFCD7, 0x8868, 0x99E1, 0xAB7A, 0xBAF3,
0x5285, 0x430C, 0x7197, 0x601E, 0x14A1, 0x0528, 0x37B3, 0x263A,
0xDECD, 0xCF44, 0xFDDF, 0xEC56, 0x98E9, 0x8960, 0xBBFB, 0xAA72,
0x6306, 0x728F, 0x4014, 0x519D, 0x2522, 0x34AB, 0x0630, 0x17B9,
0xEF4E, 0xFEC7, 0xCC5C, 0xDDD5, 0xA96A, 0xB8E3, 0x8A78, 0x9BF1,
0x7387, 0x620E, 0x5095, 0x411C, 0x35A3, 0x242A, 0x16B1, 0x0738,
0xFFCF, 0xEE46, 0xDCDD, 0xCD54, 0xB9EB, 0xA862, 0x9AF9, 0x8B70,
0x8408, 0x9581, 0xA71A, 0xB693, 0xC22C, 0xD3A5, 0xE13E, 0xF0B7,
0x0840, 0x19C9, 0x2B52, 0x3ADB, 0x4E64, 0x5FED, 0x6D76, 0x7CFF,
0x9489, 0x8500, 0xB79B, 0xA612, 0xD2AD, 0xC324, 0xF1BF, 0xE036,
0x18C1, 0x0948, 0x3BD3, 0x2A5A, 0x5EE5, 0x4F6C, 0x7DF7, 0x6C7E,
0xA50A, 0xB483, 0x8618, 0x9791, 0xE32E, 0xF2A7, 0xC03C, 0xD1B5,
0x2942, 0x38CB, 0x0A50, 0x1BD9, 0x6F66, 0x7EEF, 0x4C74, 0x5DFD,
0xB58B, 0xA402, 0x9699, 0x8710, 0xF3AF, 0xE226, 0xD0BD, 0xC134,
0x39C3, 0x284A, 0x1AD1, 0x0B58, 0x7FE7, 0x6E6E, 0x5CF5, 0x4D7C,
0xC60C, 0xD785, 0xE51E, 0xF497, 0x8028, 0x91A1, 0xA33A, 0xB2B3,
0x4A44, 0x5BCD, 0x6956, 0x78DF, 0x0C60, 0x1DE9, 0x2F72, 0x3EFB,
0xD68D, 0xC704, 0xF59F, 0xE416, 0x90A9, 0x8120, 0xB3BB, 0xA232,
0x5AC5, 0x4B4C, 0x79D7, 0x685E, 0x1CE1, 0x0D68, 0x3FF3, 0x2E7A,
0xE70E, 0xF687, 0xC41C, 0xD595, 0xA12A, 0xB0A3, 0x8238, 0x93B1,
0x6B46, 0x7ACF, 0x4854, 0x59DD, 0x2D62, 0x3CEB, 0x0E70, 0x1FF9,
0xF78F, 0xE606, 0xD49D, 0xC514, 0xB1AB, 0xA022, 0x92B9, 0x8330,
0x7BC7, 0x6A4E, 0x58D5, 0x495C, 0x3DE3, 0x2C6A, 0x1EF1, 0x0F78
};
static const u_int16_t crc16_ccitt_false_table[256] = {
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
};
static inline u_int16_t __crc16(u_int16_t crc, const void *data, size_t n_bytes) {
u_int8_t* b = (u_int8_t*)data;
while (n_bytes--) {
crc = (crc << 8) ^ crc16_ccitt_false_table[(crc >> 8) ^ *b++];
}
return crc;
}
u_int16_t ndpi_crc16_ccit(const void* data, size_t n_bytes) {
u_int16_t crc = 0;
u_int8_t* b = (u_int8_t*)data;
while (n_bytes--) {
crc = (crc >> 8) ^ crc16_ccitt_table[(crc ^ *b++) & 0xFF];
}
return crc;
}
u_int16_t ndpi_crc16_ccit_false(const void *data, size_t n_bytes) {
return __crc16(0xFFFF, data, n_bytes);
}
u_int16_t ndpi_crc16_xmodem(const void *data, size_t n_bytes) {
return __crc16(0, data, n_bytes);
}
u_int16_t ndpi_crc16_x25(const void* data, size_t n_bytes) {
u_int16_t crc = 0xFFFF;
u_int8_t* b = (u_int8_t*)data;
while (n_bytes--) {
crc = (crc >> 8) ^ crc16_ccitt_table[(crc ^ *b++) & 0xFF];
}
return (crc ^ 0xFFFF);
}
/* ********************************************************************************* */
static const u_int32_t crc32_ieee_table[256] =
{
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
u_int32_t ndpi_crc32(const void *data, size_t length, u_int32_t crc)
{
const u_int8_t *p = (const u_int8_t*)data;
crc = ~crc;
while (length--)
{
crc = crc32_ieee_table[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
}
return ~crc;
}
/* ********************************************************************************* */
/*
Count-Min Sketch: Memory Usage
https://florian.github.io/count-min-sketch/
https://medium.com/@nehasingh18.9/count-min-sketch-for-beginners-f1e441bbe7a4
https://sites.google.com/site/countminsketch/code
[Depth: 8][Total memory: 1040]
[Depth: 16][Total memory: 2064]
[Depth: 32][Total memory: 4112]
[Depth: 64][Total memory: 8208]
[Depth: 256][Total memory: 32784]
[Depth: 512][Total memory: 65552]
[Depth: 1024][Total memory: 131088]
[Depth: 2048][Total memory: 262160]
[Depth: 4096][Total memory: 524304]
[Depth: 8192][Total memory: 1048592]
*/
#define NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS 1024
// #define DEBUG
struct ndpi_cm_sketch *ndpi_cm_sketch_init(u_int16_t num_hashes) {
#ifdef DEBUG
u_int32_t tot_mem;
#endif
u_int32_t len;
struct ndpi_cm_sketch *sketch;
len = sizeof(struct ndpi_cm_sketch);
sketch = (struct ndpi_cm_sketch*)ndpi_malloc(len);
if(!sketch)
return(NULL);
#ifdef DEBUG
tot_mem = len;
#endif
if(num_hashes < 2) num_hashes = 2;
num_hashes = ndpi_nearest_power_of_two(num_hashes);
sketch->num_hashes = num_hashes;
sketch->num_hash_buckets = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS;
sketch->num_hash_buckets = ndpi_nearest_power_of_two(sketch->num_hash_buckets)-1,
len = num_hashes * NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS * sizeof(u_int32_t);
sketch->tables = (u_int32_t*)ndpi_calloc(num_hashes, NDPI_COUNT_MIN_SKETCH_NUM_BUCKETS * sizeof(u_int32_t));
#ifdef DEBUG
tot_mem += len;
#endif
#ifdef DEBUG
printf("[Num_Hashes: %u][Total memory: %u]\n", num_hashes, tot_mem);
#endif
if(!sketch->tables) {
ndpi_free(sketch);
return(NULL);
}
return(sketch);
}
/* ********************************************************************************* */
#define ndpi_simple_hash(value, seed) (value * seed)
/* ********************************************************************************* */
void ndpi_cm_sketch_add(struct ndpi_cm_sketch *sketch, u_int32_t element) {
u_int32_t idx;
for(idx = 1; idx <= sketch->num_hashes; idx++) {
u_int32_t hashval = ndpi_simple_hash(element, idx) & sketch->num_hash_buckets;
sketch->tables[hashval]++;
#ifdef DEBUG
printf("ndpi_add_sketch_add() [hash: %d][num_hash_buckets: %u][hashval: %d][value: %d]\n",
idx, sketch->num_hash_buckets, hashval, sketch->tables[hashval]);
#endif
}
}
/* ********************************************************************************* */
u_int32_t ndpi_cm_sketch_count(struct ndpi_cm_sketch *sketch, u_int32_t element) {
u_int32_t min_value = INT_MAX, idx;
for(idx = 1; idx <= sketch->num_hashes; idx++) {
u_int32_t hashval = ndpi_simple_hash(element, idx) & sketch->num_hash_buckets;
#ifdef DEBUG
printf("ndpi_add_sketch_add() [hash: %d][num_hash_buckets: %u][hashval: %d][value: %d]\n",
idx, sketch->num_hash_buckets, hashval, sketch->tables[hashval]);
#endif
min_value = ndpi_min(min_value, sketch->tables[hashval]);
}
return(min_value);
}
/* ********************************************************************************* */
void ndpi_cm_sketch_destroy(struct ndpi_cm_sketch *sketch) {
ndpi_free(sketch->tables);
ndpi_free(sketch);
}
/* ********************************************************************************* */
/* ********************************************************************************* */
/* Popcount, short for "population count," is a computer programming term that refers to
the number of set bits (bits with a value of 1) in a binary representation of a given
data word or integer. In other words, it is the count of all the 1s present in the
binary representation of a number.
For example, consider the number 45, which is represented in binary as 101101.
The popcount of 45 would be 4 because there are four 1s in its binary representation.
*/
int ndpi_popcount_init(struct ndpi_popcount *h)
{
if(h) {
memset(h, '\0', sizeof(*h));
return 0;
}
return -1;
}
/* ********************************************************************************* */
void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len)
{
u_int32_t i;
if(!h)
return;
/* Trivial alg. TODO: there are lots of better, more performant algorithms */
for(i = 0; i < buf_len / 4; i++)
h->pop_count += __builtin_popcount(*(u_int32_t *)(buf + i * 4));
for(i = 0; i < buf_len % 4; i++)
h->pop_count += __builtin_popcount(buf[buf_len - (buf_len % 4) + i]);
h->tot_bytes_count += buf_len;
}
/* ********************************************************************************* */
/* ********************************************************************************* */
ndpi_kd_tree* ndpi_kd_create(u_int num_dimensions) { return(kd_create((int)num_dimensions)); }
void ndpi_kd_free(ndpi_kd_tree *tree) { kd_free((struct kdtree *)tree); }
void ndpi_kd_clear(ndpi_kd_tree *tree) { kd_clear((struct kdtree *)tree); }
bool ndpi_kd_insert(ndpi_kd_tree *tree, const double *data_vector, void *user_data) {
return(kd_insert((struct kdtree *)tree, data_vector, user_data) == 0 ? true : false);
}
ndpi_kd_tree_result *ndpi_kd_nearest(ndpi_kd_tree *tree, const double *data_vector) {
return(kd_nearest((struct kdtree *)tree, data_vector));
}
u_int32_t ndpi_kd_num_results(ndpi_kd_tree_result *res) { return((u_int32_t)kd_res_size((struct kdres*)res)); }
double* ndpi_kd_result_get_item(ndpi_kd_tree_result *res, double **user_data) {
return(kd_res_item((struct kdres*)res, user_data));
}
void ndpi_kd_result_free(ndpi_kd_tree_result *res) { kd_res_free((struct kdres *)res); }
double ndpi_kd_distance(double *a1, double *a2, u_int num_dimensions) {
double dist_sq = 0, diff;
u_int i;
for(i=0; i