diff options
author | Luca Deri <deri@ntop.org> | 2020-06-29 19:09:38 +0200 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2020-06-29 19:09:38 +0200 |
commit | 2c263bc726c0a13302134b2b40e3677c9667e3f2 (patch) | |
tree | 2f98b9627f30748cd453ef4b136542b91dab670d /src | |
parent | 93a4a4387af951a4680061bf4e1effa87fa65406 (diff) |
Added ndpi_bin_similarity() for computing bin similarity
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h.in | 2 | ||||
-rw-r--r-- | src/lib/ndpi_analyze.c | 47 |
2 files changed, 48 insertions, 1 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index 9640d3d8b..26d7d2f66 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -1071,7 +1071,7 @@ extern "C" { void ndpi_free_bin(struct ndpi_bin *b); void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id); void ndpi_normalize_bin(struct ndpi_bin *b); - + float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first); #ifdef __cplusplus } #endif diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index e1f37cc8d..5c3460eaf 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -260,6 +260,8 @@ int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int8_t num_bins) return(0); } +/* ********************************************************************************* */ + void ndpi_free_bin(struct ndpi_bin *b) { switch(b->family) { case ndpi_bin_family8: @@ -274,6 +276,8 @@ void ndpi_free_bin(struct ndpi_bin *b) { } } +/* ********************************************************************************* */ + void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) { if(slot_id >= b->num_bins) slot_id = 0; @@ -292,6 +296,8 @@ void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) { } } +/* ********************************************************************************* */ + /* Each bin slot is transformed in a % with respect to the value total */ @@ -316,3 +322,44 @@ void ndpi_normalize_bin(struct ndpi_bin *b) { } } +/* ********************************************************************************* */ + +/* + Determines how similar are two bins + + 0 = Very differet + ... (gray zone) + 1 = Alike + + See https://en.wikipedia.org/wiki/Cosine_similarity for more details +*/ +float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first) { + u_int8_t i; + u_int32_t sumxx = 0, sumxy = 0, sumyy = 0; + + if((b1->num_incs == 0) || (b2->num_incs == 0) + || (b1->family != b2->family) || (b1->num_bins != b2->num_bins)) + return(0); + + if(normalize_first) + ndpi_normalize_bin(b1), ndpi_normalize_bin(b2); + + switch(b1->family) { + case ndpi_bin_family8: + for(i=0; i<b1->num_bins; i++) + sumxx += b1->u.bins8[i] * b1->u.bins8[i], sumyy += b2->u.bins8[i] * b2->u.bins8[i], sumxy += b1->u.bins8[i] * b2->u.bins8[i]; + break; + case ndpi_bin_family16: + for(i=0; i<b1->num_bins; i++) + sumxx += b1->u.bins16[i] * b1->u.bins16[i], sumyy += b2->u.bins16[i] * b2->u.bins16[i], sumxy += b1->u.bins16[i] * b2->u.bins16[i]; + break; + case ndpi_bin_family32: + for(i=0; i<b1->num_bins; i++) + sumxx += b1->u.bins32[i] * b1->u.bins32[i], sumyy += b2->u.bins32[i] * b2->u.bins32[i], sumxy += b1->u.bins32[i] * b2->u.bins32[i]; + break; + } + + return((float)sumxy / sqrt((float)(sumxx * sumyy))); +} + +/* ********************************************************************************* */ |