aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-12-27 22:42:37 +0100
committerLuca Deri <deri@ntop.org>2023-12-27 22:42:37 +0100
commit1366518bff88210008159cc385f092bd5cfc6252 (patch)
tree2a4db00c945c178a5636fb39d6d0e6fc0ea30727
parent99d48383286fbb865ab58db5e5f768d8ed14f41e (diff)
Implements ndpi_pearson_correlation for measuring how correlated are two series
-rw-r--r--example/ndpiReader.c13
-rw-r--r--src/include/ndpi_api.h47
-rw-r--r--src/lib/ndpi_analyze.c39
-rw-r--r--src/lib/ndpi_classify.c24
4 files changed, 108 insertions, 15 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index e3070a47f..91463bb02 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -5488,6 +5488,18 @@ void binaryBitmapUnitTest() {
/* *********************************************** */
+void pearsonUnitTest() {
+ u_int32_t data_a[] = {1, 2, 3, 4, 5};
+ u_int32_t data_b[] = {1000, 113, 104, 105, 106};
+ u_int16_t num = sizeof(data_a) / sizeof(u_int32_t);
+ float pearson = ndpi_pearson_correlation(data_a, data_b, num);
+
+ assert(pearson != 0.0);
+ // printf("%.8f\n", pearson);
+}
+
+/* *********************************************** */
+
void domainSearchUnitTest() {
ndpi_domain_classify *sc = ndpi_domain_classify_alloc();
char *domain = "ntop.org";
@@ -5576,6 +5588,7 @@ int main(int argc, char **argv) {
exit(0);
#endif
+ pearsonUnitTest();
binaryBitmapUnitTest();
domainSearchUnitTest();
domainSearchUnitTest2();
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 168014c2e..9200fcbf5 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -360,7 +360,8 @@ extern "C" {
* @return the ID of the app protocol detected
*
*/
- u_int16_t ndpi_get_flow_appprotocol(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow);
+ u_int16_t ndpi_get_flow_appprotocol(struct ndpi_detection_module_struct *ndpi_str,
+ struct ndpi_flow_struct *flow);
/**
* Get the category of the passed flows for the detected module
@@ -371,7 +372,8 @@ extern "C" {
* @return the ID of the category
*
*/
- ndpi_protocol_category_t ndpi_get_flow_category(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow);
+ ndpi_protocol_category_t ndpi_get_flow_category(struct ndpi_detection_module_struct *ndpi_str,
+ struct ndpi_flow_struct *flow);
/**
* Get the ndpi protocol data of the passed flows for the detected module
@@ -382,8 +384,9 @@ extern "C" {
* @par ndpi_proto = the output struct where to store the requested information
*
*/
- void ndpi_get_flow_ndpi_proto(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow,
- struct ndpi_proto * ndpi_proto);
+ void ndpi_get_flow_ndpi_proto(struct ndpi_detection_module_struct *ndpi_str,
+ struct ndpi_flow_struct *flow,
+ struct ndpi_proto * ndpi_proto);
/**
* API call that is called internally by ndpi_detection_process_packet or by apps
@@ -415,7 +418,8 @@ extern "C" {
else != 0
*
*/
- u_int8_t ndpi_detection_get_l4(const u_int8_t *l3, u_int16_t l3_len, const u_int8_t **l4_return, u_int16_t *l4_len_return,
+ u_int8_t ndpi_detection_get_l4(const u_int8_t *l3, u_int16_t l3_len,
+ const u_int8_t **l4_return, u_int16_t *l4_len_return,
u_int8_t *l4_protocol_return, u_int32_t flags);
/**
@@ -430,10 +434,8 @@ extern "C" {
*
*/
ndpi_protocol ndpi_find_port_based_protocol(struct ndpi_detection_module_struct *ndpi_struct/* , u_int8_t proto */,
- u_int32_t shost,
- u_int16_t sport,
- u_int32_t dhost,
- u_int16_t dport);
+ u_int32_t shost, u_int16_t sport,
+ u_int32_t dhost, u_int16_t dport);
/**
* Search and return the protocol guessed that is undetected
*
@@ -463,10 +465,8 @@ extern "C" {
ndpi_protocol ndpi_guess_undetected_protocol_v4(struct ndpi_detection_module_struct *ndpi_struct,
struct ndpi_flow_struct *flow,
u_int8_t proto,
- u_int32_t shost,
- u_int16_t sport,
- u_int32_t dhost,
- u_int16_t dport);
+ u_int32_t shost, u_int16_t sport,
+ u_int32_t dhost, u_int16_t dport);
/**
* Check if the string passed match with a protocol
*
@@ -1742,6 +1742,7 @@ extern "C" {
/* Data analysis */
struct ndpi_analyze_struct* ndpi_alloc_data_analysis(u_int16_t _max_series_len);
+ struct ndpi_analyze_struct* ndpi_alloc_data_analysis_from_series(const u_int32_t *values, u_int16_t num_values);
void ndpi_init_data_analysis(struct ndpi_analyze_struct *s, u_int16_t _max_series_len);
void ndpi_free_data_analysis(struct ndpi_analyze_struct *d, u_int8_t free_pointer);
void ndpi_reset_data_analysis(struct ndpi_analyze_struct *d);
@@ -1928,6 +1929,26 @@ extern "C" {
/* ******************************* */
+ /*
+ * Checks if the two series are correlated using the
+ * Pearson correlation coefficient that is a value in the -1..0..+1 range
+ * where:
+ * -1 < x < 0 Negative correlation (when one changes the other series changes in opposite direction)
+ * x = 0 No correlation (no relationship between the series)
+ * 0 < x < 1 Positive correlation (when one changes the other series changes in the same direction)
+ * (i.e. when a series increases, the other also increase and vice-versa)
+ *
+ * @par values_a = First series with num_values values
+ * @par values_b = Second series with num_values values
+ * @par num_values = Number of series entries
+ *
+ * @return pearson correlation coefficient
+ *
+ */
+ double ndpi_pearson_correlation(u_int32_t *values_a, u_int32_t *values_b, u_int16_t num_values);
+
+ /* ******************************* */
+
u_int32_t ndpi_quick_16_byte_hash(u_int8_t *in_16_bytes_long);
/* ******************************* */
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index bb0b74fd4..7a2fd495c 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -66,6 +66,20 @@ struct ndpi_analyze_struct* ndpi_alloc_data_analysis(u_int16_t _max_series_len)
/* ********************************************************************************* */
+struct ndpi_analyze_struct* ndpi_alloc_data_analysis_from_series(const u_int32_t *values, u_int16_t num_values) {
+ u_int16_t i;
+ struct ndpi_analyze_struct *ret = ndpi_alloc_data_analysis(num_values);
+
+ if(ret == NULL) return(NULL);
+
+ for(i=0; i<num_values; i++)
+ ndpi_data_add_value(ret, (const u_int64_t)values[i]);
+
+ return(ret);
+}
+
+/* ********************************************************************************* */
+
void ndpi_free_data_analysis(struct ndpi_analyze_struct *d, u_int8_t free_pointer) {
if(d && d->values) ndpi_free(d->values);
if(free_pointer) ndpi_free(d);
@@ -1670,6 +1684,31 @@ int ndpi_predict_linear(u_int32_t *values, u_int32_t num_values,
/* ********************************************************************************* */
+double ndpi_pearson_correlation(u_int32_t *values_a, u_int32_t *values_b, u_int16_t num_values) {
+ double sum_a = 0, sum_b = 0, sum_squared_diff_a = 0, sum_squared_diff_b = 0, sum_product_diff = 0;
+ u_int16_t i;
+ double mean_a, mean_b, variance_a, variance_b, covariance;
+
+ if(num_values == 0) return(0.0);
+
+ for(i = 0; i < num_values; i++)
+ sum_a += values_a[i], sum_b += values_b[i];
+
+ mean_a = sum_a / num_values, mean_b = sum_b / num_values;
+
+ for(i = 0; i < num_values; i++)
+ sum_squared_diff_a += pow(values_a[i] - mean_a, 2),
+ sum_squared_diff_b += pow(values_b[i] - mean_b, 2),
+ sum_product_diff += (values_a[i] - mean_a) * (values_b[i] - mean_b);
+
+ variance_a = sum_squared_diff_a / (double)num_values, variance_b = sum_squared_diff_b / (double)num_values;
+ covariance = sum_product_diff / (double)num_values;
+
+ return(covariance / sqrt(variance_a * variance_b));
+}
+
+/* ********************************************************************************* */
+
static const u_int16_t crc16_ccitt_table[256] = {
0x0000, 0x1189, 0x2312, 0x329B, 0x4624, 0x57AD, 0x6536, 0x74BF,
0x8C48, 0x9DC1, 0xAF5A, 0xBED3, 0xCA6C, 0xDBE5, 0xE97E, 0xF8F7,
diff --git a/src/lib/ndpi_classify.c b/src/lib/ndpi_classify.c
index a7937ed08..b8bf8213d 100644
--- a/src/lib/ndpi_classify.c
+++ b/src/lib/ndpi_classify.c
@@ -61,6 +61,7 @@
_a < _b ? _a : _b; })
#endif
+/* **************************************** */
//bias (1) + w (207)
//const float ndpi_parameters_splt[NUM_PARAMETERS_SPLT_LOGREG] = {
@@ -240,6 +241,8 @@ float ndpi_parameters_bd[NUM_PARAMETERS_BD_LOGREG] = {
0.000000000000000000e+00, 0.000000000000000000e+00, -9.635140703414636576e+00, 2.603288107669730511e+00,
};
+/* **************************************** */
+
/**
* \fn void ndpi_merge_splt_arrays (const uint16_t *pkt_len, const pkt_timeval *pkt_time,
const uint16_t *pkt_len_twin, const pkt_timeval *pkt_time_twin,
@@ -342,6 +345,8 @@ ndpi_merge_splt_arrays (const uint16_t *pkt_len, const pkt_timeval *pkt_time,
merged_times[0] = ndpi_timeval_to_microseconds(start_m);
}
+/* **************************************** */
+
/* transform lens array to Markov chain */
static void
ndpi_get_mc_rep_lens (uint16_t *lens, float *length_mc, uint16_t num_packets)
@@ -382,6 +387,8 @@ ndpi_get_mc_rep_lens (uint16_t *lens, float *length_mc, uint16_t num_packets)
}
}
+/* **************************************** */
+
/* transform times array to Markov chain */
void
ndpi_get_mc_rep_times (uint16_t *times, float *time_mc, uint16_t num_packets)
@@ -421,6 +428,8 @@ ndpi_get_mc_rep_times (uint16_t *times, float *time_mc, uint16_t num_packets)
}
}
+/* **************************************** */
+
/**
* \fn float classify (const unsigned short *pkt_len, const pkt_timeval *pkt_time,
const unsigned short *pkt_len_twin, const pkt_timeval *pkt_time_twin,
@@ -452,8 +461,7 @@ ndpi_classify (const unsigned short *pkt_len, const pkt_timeval *pkt_time,
const unsigned short *pkt_len_twin, const pkt_timeval *pkt_time_twin,
pkt_timeval start_time, pkt_timeval start_time_twin, uint32_t max_num_pkt_len,
uint16_t sp, uint16_t dp, uint32_t op, uint32_t ip, uint32_t np_o, uint32_t np_i,
- uint32_t ob, uint32_t ib, uint16_t use_bd, const uint32_t *bd, const uint32_t *bd_t)
-{
+ uint32_t ob, uint32_t ib, uint16_t use_bd, const uint32_t *bd, const uint32_t *bd_t) {
float features[NUM_PARAMETERS_BD_LOGREG] = {1.0};
float mc_lens[MC_BINS_LEN*MC_BINS_LEN];
@@ -539,6 +547,8 @@ ndpi_classify (const unsigned short *pkt_len, const pkt_timeval *pkt_time,
return 1.0/(1.0+exp(score));
}
+/* **************************************** */
+
/**
* \fn void update_params (char *splt_params, char *bd_params)
* \brief if a user supplies new parameter files, update parameters splt/bd
@@ -592,6 +602,8 @@ ndpi_update_params (classifier_type_codes_t param_type, const char *param_file)
}
}
+/* **************************************** */
+
/* *********************************************************************
* ---------------------------------------------------------------------
* Time functions
@@ -619,6 +631,8 @@ ndpi_timer_eq(const pkt_timeval *a,
return 0;
}
+/* **************************************** */
+
unsigned int
ndpi_timer_lt(const pkt_timeval *a,
const pkt_timeval *b)
@@ -647,6 +661,8 @@ ndpi_timer_sub(const pkt_timeval *a,
}
}
+/* **************************************** */
+
/**
* \brief Zeroize a timeval.
* \param a Timeval to zero out
@@ -658,6 +674,8 @@ ndpi_timer_clear(pkt_timeval *a)
a->tv_sec = a->tv_usec = 0;
}
+/* **************************************** */
+
/**
* \brief Calculate the milliseconds representation of a timeval.
* \param ts Timeval
@@ -671,6 +689,8 @@ ndpi_timeval_to_milliseconds(pkt_timeval ts)
return usec / 1000 + sec * 1000;
}
+/* **************************************** */
+
/**
* \brief Calculate the microseconds representation of a timeval.
* \param ts Timeval