diff options
author | claudio_burrafato <92789607+claudio4495@users.noreply.github.com> | 2022-06-15 16:36:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-15 16:36:25 +0200 |
commit | c73a0988cd8d15d435b259f5f51f7f6d3a417dbf (patch) | |
tree | 58f3fa51726fe93b2df63826a583ed4959d6105b | |
parent | e2cc08bfe56dcd2738b684c44eb478cea352de4f (diff) |
Add some statistics to ndpiReader (#1587)
* Add some statistics to ndpiReader
The purpose of this version of ndpiReader is too adding some other statistics printed by ndpiReader. In this simple version the domain names(in the flows) that are collected are:
flow-> ssh_tls.server_info
flow-> host_server_name
and are placed in a UT_hash_table, ordering them by number of occurrences.
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update example/ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
* Update ndpiReader.c
* Update ndpiReader.c
* Update ndpiReader.c
* Update ndpiReader.c
Co-authored-by: Toni <matzeton@googlemail.com>
-rw-r--r-- | example/ndpiReader.c | 137 |
1 files changed, 136 insertions, 1 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 171615600..4ec3a528f 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -146,6 +146,14 @@ typedef struct node_a { struct node_a *left, *right; }addr_node; +// struct to add more statitcs in function printFlowStats +typedef struct hash_stats{ + char* domain_name; + int occurency; /* how many time domain name occury in the flow */ + UT_hash_handle hh; /* hashtable to collect the stats */ +}hash_stats; + + struct port_stats { u_int32_t port; /* we'll use this field as the key */ u_int32_t num_pkts, num_bytes; @@ -476,10 +484,11 @@ static void help(u_int long_help) { " | testing purposes in order to compare results across runs\n" " -h | This help\n" " -H | This help plus some information about supported protocols/risks\n" - " -v <1|2|3> | Verbose 'unknown protocol' packet print.\n" + " -v <1|2|3|4> | Verbose 'unknown protocol' packet print.\n" " | 1 = verbose\n" " | 2 = very verbose\n" " | 3 = port stats\n" + " | 4 = hash stats\n" " -V <1-4> | nDPI logging level\n" " | 1 - trace, 2 - debug, 3 - full debug\n" " | >3 - full debug + log enabled for all protocols (i.e. '-u all')\n" @@ -2721,6 +2730,26 @@ static void printRiskStats() { /* *********************************************** */ +/*function to use in HASH_SORT function in verbose == 4 to order in creasing order to delete host with the leatest occurency*/ +static int hash_stats_sort_to_order(void *_a, void *_b){ + struct hash_stats *a = (struct hash_stats*)_a; + struct hash_stats *b = (struct hash_stats*)_b; + + return (a->occurency - b->occurency); +} + +/* *********************************************** */ + +/*function to use in HASH_SORT function in verbose == 4 to print in decreasing order*/ +static int hash_stats_sort_to_print(void *_a, void *_b){ + struct hash_stats *a = (struct hash_stats*)_a; + struct hash_stats *b = (struct hash_stats*)_b; + + return (b->occurency - a->occurency); +} + +/* *********************************************** */ + static void printFlowsStats() { int thread_id; u_int32_t total_flows = 0; @@ -3075,6 +3104,112 @@ static void printFlowsStats() { } } + if (verbose == 4){ + //how long the table could be + unsigned int len_table_max = 1000; + //number of element to delete when the table is full + int toDelete = 10; + struct hash_stats *hostsHashT = NULL; + struct hash_stats *host_iter = NULL; + struct hash_stats *tmp = NULL; + int len_max = 0; + + for (i = 0; i<num_flows; i++){ + + if(all_flows[i].flow->host_server_name[0] != '\0'){ + + int len = strlen(all_flows[i].flow->host_server_name); + len_max = ndpi_max(len,len_max); + + struct hash_stats *hostFound; + HASH_FIND_STR(hostsHashT, all_flows[i].flow->host_server_name, hostFound); + + if(hostFound == NULL){ + struct hash_stats *newHost = (struct hash_stats*)ndpi_malloc(sizeof(hash_stats)); + newHost->domain_name = all_flows[i].flow->host_server_name; + newHost->occurency = 1; + if (HASH_COUNT(hostsHashT) == len_table_max) { + int i=0; + while (i<=toDelete){ + + HASH_ITER(hh, hostsHashT, host_iter, tmp){ + HASH_DEL(hostsHashT,host_iter); + free(host_iter); + i++; + } + } + + } + HASH_ADD_KEYPTR(hh, hostsHashT, newHost->domain_name, strlen(newHost->domain_name), newHost); + } + else + hostFound->occurency++; + + + } + + if(all_flows[i].flow->ssh_tls.server_info[0] != '\0'){ + + int len = strlen(all_flows[i].flow->host_server_name); + len_max = ndpi_max(len,len_max); + + struct hash_stats *hostFound; + HASH_FIND_STR(hostsHashT, all_flows[i].flow->ssh_tls.server_info, hostFound); + + if(hostFound == NULL){ + struct hash_stats *newHost = (struct hash_stats*)ndpi_malloc(sizeof(hash_stats)); + newHost->domain_name = all_flows[i].flow->ssh_tls.server_info; + newHost->occurency = 1; + + if ((HASH_COUNT(hostsHashT)) == len_table_max) { + int i=0; + while (i<toDelete){ + + HASH_ITER(hh, hostsHashT, host_iter, tmp){ + HASH_DEL(hostsHashT,host_iter); + ndpi_free(host_iter); + i++; + } + } + + + } + HASH_ADD_KEYPTR(hh, hostsHashT, newHost->domain_name, strlen(newHost->domain_name), newHost); + } + else + hostFound->occurency++; + + + } + + //sort the table by the least occurency + HASH_SORT(hostsHashT, hash_stats_sort_to_order); + } + + //sort the table in decreasing order to print + HASH_SORT(hostsHashT, hash_stats_sort_to_print); + + //print the element of the hash table + int j; + HASH_ITER(hh, hostsHashT, host_iter, tmp){ + + printf("\t%s", host_iter->domain_name); + //to print the occurency in aligned column + int diff = len_max-strlen(host_iter->domain_name); + for (j = 0; j <= diff+5;j++) + printf (" "); + printf("%d\n",host_iter->occurency); + } + printf("%s", "\n\n"); + + //freeing the hash table + HASH_ITER(hh, hostsHashT, host_iter, tmp){ + HASH_DEL(hostsHashT, host_iter); + ndpi_free(host_iter); + } + + } + /* Print all flows stats */ qsort(all_flows, num_flows, sizeof(struct flow_info), cmpFlows); |