diff options
author | Luca Deri <deri@ntop.org> | 2020-06-29 19:09:38 +0200 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2020-06-29 19:09:38 +0200 |
commit | 2c263bc726c0a13302134b2b40e3677c9667e3f2 (patch) | |
tree | 2f98b9627f30748cd453ef4b136542b91dab670d | |
parent | 93a4a4387af951a4680061bf4e1effa87fa65406 (diff) |
Added ndpi_bin_similarity() for computing bin similarity
-rw-r--r-- | example/ndpiReader.c | 21 | ||||
-rw-r--r-- | src/include/ndpi_api.h.in | 2 | ||||
-rw-r--r-- | src/lib/ndpi_analyze.c | 47 |
3 files changed, 69 insertions, 1 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 2ded09888..6c851179c 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -3110,6 +3110,26 @@ void test_lib() { /* *********************************************** */ +static void binUnitTest() { + struct ndpi_bin b1, b2; + u_int8_t num_bins = 32; + u_int32_t i; + + srand(time(NULL)); + + ndpi_init_bin(&b1, ndpi_bin_family8, num_bins), ndpi_init_bin(&b2, ndpi_bin_family8, num_bins); + + for(i=0; i<32; i++) + ndpi_inc_bin(&b1, rand() % num_bins), ndpi_inc_bin(&b2, rand() % num_bins); + + ndpi_bin_similarity(&b1, &b1, 0); + ndpi_bin_similarity(&b1, &b2, 0); + + ndpi_free_bin(&b1), ndpi_free_bin(&b2); +} + +/* *********************************************** */ + static void dgaUnitTest() { const char *dga[] = { "lbjamwptxz", @@ -3495,6 +3515,7 @@ int orginal_main(int argc, char **argv) { if(ndpi_info_mod == NULL) return -1; /* Internal checks */ + binUnitTest(); dgaUnitTest(); hllUnitTest(); bitmapUnitTest(); diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index 9640d3d8b..26d7d2f66 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -1071,7 +1071,7 @@ extern "C" { void ndpi_free_bin(struct ndpi_bin *b); void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id); void ndpi_normalize_bin(struct ndpi_bin *b); - + float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first); #ifdef __cplusplus } #endif diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index e1f37cc8d..5c3460eaf 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -260,6 +260,8 @@ int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int8_t num_bins) return(0); } +/* ********************************************************************************* */ + void ndpi_free_bin(struct ndpi_bin *b) { switch(b->family) { case ndpi_bin_family8: @@ -274,6 +276,8 @@ void ndpi_free_bin(struct ndpi_bin *b) { } } +/* ********************************************************************************* */ + void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) { if(slot_id >= b->num_bins) slot_id = 0; @@ -292,6 +296,8 @@ void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id) { } } +/* ********************************************************************************* */ + /* Each bin slot is transformed in a % with respect to the value total */ @@ -316,3 +322,44 @@ void ndpi_normalize_bin(struct ndpi_bin *b) { } } +/* ********************************************************************************* */ + +/* + Determines how similar are two bins + + 0 = Very differet + ... (gray zone) + 1 = Alike + + See https://en.wikipedia.org/wiki/Cosine_similarity for more details +*/ +float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t normalize_first) { + u_int8_t i; + u_int32_t sumxx = 0, sumxy = 0, sumyy = 0; + + if((b1->num_incs == 0) || (b2->num_incs == 0) + || (b1->family != b2->family) || (b1->num_bins != b2->num_bins)) + return(0); + + if(normalize_first) + ndpi_normalize_bin(b1), ndpi_normalize_bin(b2); + + switch(b1->family) { + case ndpi_bin_family8: + for(i=0; i<b1->num_bins; i++) + sumxx += b1->u.bins8[i] * b1->u.bins8[i], sumyy += b2->u.bins8[i] * b2->u.bins8[i], sumxy += b1->u.bins8[i] * b2->u.bins8[i]; + break; + case ndpi_bin_family16: + for(i=0; i<b1->num_bins; i++) + sumxx += b1->u.bins16[i] * b1->u.bins16[i], sumyy += b2->u.bins16[i] * b2->u.bins16[i], sumxy += b1->u.bins16[i] * b2->u.bins16[i]; + break; + case ndpi_bin_family32: + for(i=0; i<b1->num_bins; i++) + sumxx += b1->u.bins32[i] * b1->u.bins32[i], sumyy += b2->u.bins32[i] * b2->u.bins32[i], sumxy += b1->u.bins32[i] * b2->u.bins32[i]; + break; + } + + return((float)sumxy / sqrt((float)(sumxx * sumyy))); +} + +/* ********************************************************************************* */ |