aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2020-06-29 19:09:38 +0200
committerLuca Deri <deri@ntop.org>2020-06-29 19:09:38 +0200
commit2c263bc726c0a13302134b2b40e3677c9667e3f2 (patch)
tree2f98b9627f30748cd453ef4b136542b91dab670d /src
parent93a4a4387af951a4680061bf4e1effa87fa65406 (diff)
Added ndpi_bin_similarity() for computing bin similarity
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h.in2
-rw-r--r--src/lib/ndpi_analyze.c47
2 files changed, 48 insertions, 1 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in
index 9640d3d8b..26d7d2f66 100644
--- a/src/include/ndpi_api.h.in
+++ b/src/include/ndpi_api.h.in
@@ -1071,7 +1071,7 @@ extern "C" {
void ndpi_free_bin(struct ndpi_bin *b);
void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id);
void ndpi_normalize_bin(struct ndpi_bin *b);
-
+ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first);
#ifdef __cplusplus
}
#endif
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index e1f37cc8d..5c3460eaf 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -260,6 +260,8 @@ int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int8_t num_bins)
return(0);
}
+/* ********************************************************************************* */
+
void ndpi_free_bin(struct ndpi_bin *b) {
switch(b->family) {
case ndpi_bin_family8:
@@ -274,6 +276,8 @@ void ndpi_free_bin(struct ndpi_bin *b) {
}
}
+/* ********************************************************************************* */
+
void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) {
if(slot_id >= b->num_bins) slot_id = 0;
@@ -292,6 +296,8 @@ void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) {
}
}
+/* ********************************************************************************* */
+
/*
Each bin slot is transformed in a % with respect to the value total
*/
@@ -316,3 +322,44 @@ void ndpi_normalize_bin(struct ndpi_bin *b) {
}
}
+/* ********************************************************************************* */
+
+/*
+ Determines how similar are two bins
+
+ 0 = Very differet
+ ... (gray zone)
+ 1 = Alike
+
+ See https://en.wikipedia.org/wiki/Cosine_similarity for more details
+*/
+float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first) {
+ u_int8_t i;
+ u_int32_t sumxx = 0, sumxy = 0, sumyy = 0;
+
+ if((b1->num_incs == 0) || (b2->num_incs == 0)
+ || (b1->family != b2->family) || (b1->num_bins != b2->num_bins))
+ return(0);
+
+ if(normalize_first)
+ ndpi_normalize_bin(b1), ndpi_normalize_bin(b2);
+
+ switch(b1->family) {
+ case ndpi_bin_family8:
+ for(i=0; i<b1->num_bins; i++)
+ sumxx += b1->u.bins8[i] * b1->u.bins8[i], sumyy += b2->u.bins8[i] * b2->u.bins8[i], sumxy += b1->u.bins8[i] * b2->u.bins8[i];
+ break;
+ case ndpi_bin_family16:
+ for(i=0; i<b1->num_bins; i++)
+ sumxx += b1->u.bins16[i] * b1->u.bins16[i], sumyy += b2->u.bins16[i] * b2->u.bins16[i], sumxy += b1->u.bins16[i] * b2->u.bins16[i];
+ break;
+ case ndpi_bin_family32:
+ for(i=0; i<b1->num_bins; i++)
+ sumxx += b1->u.bins32[i] * b1->u.bins32[i], sumyy += b2->u.bins32[i] * b2->u.bins32[i], sumxy += b1->u.bins32[i] * b2->u.bins32[i];
+ break;
+ }
+
+ return((float)sumxy / sqrt((float)(sumxx * sumyy)));
+}
+
+/* ********************************************************************************* */