aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--example/ndpiReader.c21
-rw-r--r--src/include/ndpi_api.h.in2
-rw-r--r--src/lib/ndpi_analyze.c47
3 files changed, 69 insertions, 1 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 2ded09888..6c851179c 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -3110,6 +3110,26 @@ void test_lib() {
/* *********************************************** */
+static void binUnitTest() {
+ struct ndpi_bin b1, b2;
+ u_int8_t num_bins = 32;
+ u_int32_t i;
+
+ srand(time(NULL));
+
+ ndpi_init_bin(&b1, ndpi_bin_family8, num_bins), ndpi_init_bin(&b2, ndpi_bin_family8, num_bins);
+
+ for(i=0; i<32; i++)
+ ndpi_inc_bin(&b1, rand() % num_bins), ndpi_inc_bin(&b2, rand() % num_bins);
+
+ ndpi_bin_similarity(&b1, &b1, 0);
+ ndpi_bin_similarity(&b1, &b2, 0);
+
+ ndpi_free_bin(&b1), ndpi_free_bin(&b2);
+}
+
+/* *********************************************** */
+
static void dgaUnitTest() {
const char *dga[] = {
"lbjamwptxz",
@@ -3495,6 +3515,7 @@ int orginal_main(int argc, char **argv) {
if(ndpi_info_mod == NULL) return -1;
/* Internal checks */
+ binUnitTest();
dgaUnitTest();
hllUnitTest();
bitmapUnitTest();
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in
index 9640d3d8b..26d7d2f66 100644
--- a/src/include/ndpi_api.h.in
+++ b/src/include/ndpi_api.h.in
@@ -1071,7 +1071,7 @@ extern "C" {
void ndpi_free_bin(struct ndpi_bin *b);
void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id);
void ndpi_normalize_bin(struct ndpi_bin *b);
-
+ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first);
#ifdef __cplusplus
}
#endif
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index e1f37cc8d..5c3460eaf 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -260,6 +260,8 @@ int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int8_t num_bins)
return(0);
}
+/* ********************************************************************************* */
+
void ndpi_free_bin(struct ndpi_bin *b) {
switch(b->family) {
case ndpi_bin_family8:
@@ -274,6 +276,8 @@ void ndpi_free_bin(struct ndpi_bin *b) {
}
}
+/* ********************************************************************************* */
+
void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) {
if(slot_id >= b->num_bins) slot_id = 0;
@@ -292,6 +296,8 @@ void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) {
}
}
+/* ********************************************************************************* */
+
/*
Each bin slot is transformed in a % with respect to the value total
*/
@@ -316,3 +322,44 @@ void ndpi_normalize_bin(struct ndpi_bin *b) {
}
}
+/* ********************************************************************************* */
+
+/*
+ Determines how similar are two bins
+
+ 0 = Very differet
+ ... (gray zone)
+ 1 = Alike
+
+ See https://en.wikipedia.org/wiki/Cosine_similarity for more details
+*/
+float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first) {
+ u_int8_t i;
+ u_int32_t sumxx = 0, sumxy = 0, sumyy = 0;
+
+ if((b1->num_incs == 0) || (b2->num_incs == 0)
+ || (b1->family != b2->family) || (b1->num_bins != b2->num_bins))
+ return(0);
+
+ if(normalize_first)
+ ndpi_normalize_bin(b1), ndpi_normalize_bin(b2);
+
+ switch(b1->family) {
+ case ndpi_bin_family8:
+ for(i=0; i<b1->num_bins; i++)
+ sumxx += b1->u.bins8[i] * b1->u.bins8[i], sumyy += b2->u.bins8[i] * b2->u.bins8[i], sumxy += b1->u.bins8[i] * b2->u.bins8[i];
+ break;
+ case ndpi_bin_family16:
+ for(i=0; i<b1->num_bins; i++)
+ sumxx += b1->u.bins16[i] * b1->u.bins16[i], sumyy += b2->u.bins16[i] * b2->u.bins16[i], sumxy += b1->u.bins16[i] * b2->u.bins16[i];
+ break;
+ case ndpi_bin_family32:
+ for(i=0; i<b1->num_bins; i++)
+ sumxx += b1->u.bins32[i] * b1->u.bins32[i], sumyy += b2->u.bins32[i] * b2->u.bins32[i], sumxy += b1->u.bins32[i] * b2->u.bins32[i];
+ break;
+ }
+
+ return((float)sumxy / sqrt((float)(sumxx * sumyy)));
+}
+
+/* ********************************************************************************* */