diff options
author | Ivan Nardi <12729895+IvanNardi@users.noreply.github.com> | 2022-07-29 12:07:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-29 12:07:41 +0200 |
commit | bb83899985c25097341b947c2c535f56254a075c (patch) | |
tree | 8f40308c2e4885fc41bd5db5a233cb6956ad7697 /src | |
parent | 95e16872fadfc98256fdcf729f267237c727e4c3 (diff) |
Patricia tree, Ahocarasick automa, LRU cache: add statistics (#1677)
Add (basic) internal stats to the main data structures used by the
library; they might be usefull to check how effective these structures
are.
Add an option to `ndpiReader` to dump them; disabled by default to avoid
too much fuss with the unit tests.
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h.in | 32 | ||||
-rw-r--r-- | src/include/ndpi_patricia_typedefs.h | 6 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 58 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 117 | ||||
-rw-r--r-- | src/lib/third_party/include/ahocorasick.h | 7 | ||||
-rw-r--r-- | src/lib/third_party/src/ahocorasick.c | 23 | ||||
-rw-r--r-- | src/lib/third_party/src/ndpi_patricia.c | 6 |
7 files changed, 242 insertions, 7 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index a069d0571..ab853d402 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -926,6 +926,28 @@ extern "C" { void ndpi_finalize_automa(void *_automa); /** + * Get the automa statistics + * + * @par The automata initialized with ndpi_init_automa(); + * + */ + + void ndpi_automa_get_stats(void *_automa, struct ndpi_automa_stats *stats); + + /** + * Get the statistics of one of the automas used internally by the library + * + * @par ndpi_mod = the detection module + * @par automa_type = of which automa we want the stats + * @par stats = buffer where to save the stats + * @return 0 in case of no error, or -1 if an error occurred. + * + */ + + int ndpi_get_automa_stats(struct ndpi_detection_module_struct *ndpi_struct, + automa_type automa_type, + struct ndpi_automa_stats *stats); + /** * Add a string to match to an automata * * @par The automata initialized with ndpi_init_automa(); @@ -982,6 +1004,11 @@ extern "C" { u_int8_t ndpi_lru_find_cache(struct ndpi_lru_cache *c, u_int32_t key, u_int16_t *value, u_int8_t clean_key_when_found); void ndpi_lru_add_to_cache(struct ndpi_lru_cache *c, u_int32_t key, u_int16_t value); + void ndpi_lru_get_stats(struct ndpi_lru_cache *c, struct ndpi_lru_cache_stats *stats); + + int ndpi_get_lru_cache_stats(struct ndpi_detection_module_struct *ndpi_struct, + lru_cache_type cache_type, + struct ndpi_lru_cache_stats *stats); /** * Find a protocol id associated with a string automata @@ -1096,6 +1123,11 @@ extern "C" { ndpi_prefix_t *ndpi_patricia_get_node_prefix(ndpi_patricia_node_t *node); u_int16_t ndpi_patricia_get_node_bits(ndpi_patricia_node_t *node); u_int16_t ndpi_patricia_get_maxbits(ndpi_patricia_tree_t *tree); + void ndpi_patricia_get_stats(ndpi_patricia_tree_t *tree, struct ndpi_patricia_tree_stats *stats); + + int ndpi_get_patricia_stats(struct ndpi_detection_module_struct *ndpi_struct, + ptree_type ptree_type, + struct ndpi_patricia_tree_stats *stats); /* ptree (trie) API - a wrapper on top of Patricia that seamlessly handle IPv4 and IPv6 */ ndpi_ptree_t* ndpi_ptree_create(void); diff --git a/src/include/ndpi_patricia_typedefs.h b/src/include/ndpi_patricia_typedefs.h index c9f7fddac..a75ee416f 100644 --- a/src/include/ndpi_patricia_typedefs.h +++ b/src/include/ndpi_patricia_typedefs.h @@ -99,10 +99,16 @@ typedef struct _ndpi_patricia_node_t { union ndpi_patricia_node_value_t value; } ndpi_patricia_node_t; +struct ndpi_patricia_tree_stats { + u_int64_t n_search; + u_int64_t n_found; +}; + typedef struct _ndpi_patricia_tree_t { ndpi_patricia_node_t *head; u_int16_t maxbits; /* for IP, 32 bit addresses */ int num_active_node; /* for debug purpose */ + struct ndpi_patricia_tree_stats stats; } ndpi_patricia_tree_t; #endif /* _NDPI_PATRICIA_TYPEDEF_H_ */ diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index dde19c488..f5fccfd56 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -595,13 +595,55 @@ typedef enum { NDPI_HTTP_METHOD_RPC_OUT_DATA, } ndpi_http_method; +typedef enum { + NDPI_PTREE_RISK_MASK = 0, + NDPI_PTREE_RISK, + NDPI_PTREE_PROTOCOLS, + + NDPI_PTREE_MAX /* Last one! */ +} ptree_type; + +typedef enum { + NDPI_AUTOMA_HOST = 0, + NDPI_AUTOMA_DOMAIN, + NDPI_AUTOMA_TLS_CERT, + NDPI_AUTOMA_RISK_MASK, + NDPI_AUTOMA_COMMON_ALPNS, + + NDPI_AUTOMA_MAX /* Last one! */ +} automa_type; + +struct ndpi_automa_stats { + u_int64_t n_search; + u_int64_t n_found; +}; + +typedef enum { + NDPI_LRUCACHE_OOKLA = 0, + NDPI_LRUCACHE_BITTORRENT, + NDPI_LRUCACHE_ZOOM, + NDPI_LRUCACHE_STUN, + NDPI_LRUCACHE_TLS_CERT, + NDPI_LRUCACHE_MINING, + NDPI_LRUCACHE_MSTEAMS, + + NDPI_LRUCACHE_MAX /* Last one! */ +} lru_cache_type; + struct ndpi_lru_cache_entry { u_int32_t key; /* Store the whole key to avoid ambiguities */ u_int32_t is_full:1, value:16, pad:15; }; +struct ndpi_lru_cache_stats { + u_int64_t n_insert; + u_int64_t n_search; + u_int64_t n_found; +}; + struct ndpi_lru_cache { u_int32_t num_entries; + struct ndpi_lru_cache_stats stats; struct ndpi_lru_cache_entry *entries; }; @@ -1013,6 +1055,7 @@ typedef struct ndpi_default_ports_tree_node { typedef struct _ndpi_automa { void *ac_automa; /* Real type is AC_AUTOMATA_t */ + struct ndpi_automa_stats stats; } ndpi_automa; typedef struct ndpi_str_hash { @@ -1111,23 +1154,28 @@ struct ndpi_detection_module_struct { ndpi_automa host_automa, /* Used for DNS/HTTPS */ risky_domain_automa, tls_cert_subject_automa, host_risk_mask_automa, common_alpns_automa; + /* IMPORTANT: please, whenever you add a new automa: + * update ndpi_finalize_initialization() + * update automa_type above + */ + ndpi_str_hash *malicious_ja3_hashmap, *malicious_sha1_hashmap; - /* IMPORTANT: please update ndpi_finalize_initialization() whenever you add a new automa */ ndpi_list *trusted_issuer_dn; void *ip_risk_mask_ptree; void *ip_risk_ptree; + /* IP-based protocol detection */ + void *protocols_ptree; + /* *** If you add a new Patricia tree, please update ptree_type above! *** */ + struct { ndpi_automa hostnames, hostnames_shadow; void *ipAddresses, *ipAddresses_shadow; /* Patricia */ u_int8_t categories_loaded; } custom_categories; - /* IP-based protocol detection */ - void *protocols_ptree; - u_int8_t ip_version_limit; /* NDPI_PROTOCOL_OOKLA */ @@ -1154,6 +1202,8 @@ struct ndpi_detection_module_struct { /* NDPI_PROTOCOL_MSTEAMS */ struct ndpi_lru_cache *msteams_cache; + /* *** If you add a new LRU cache, please update lru_cache_type above! *** */ + ndpi_proto_defaults_t proto_defaults[NDPI_MAX_SUPPORTED_PROTOCOLS+NDPI_MAX_NUM_CUSTOM_PROTOCOLS]; u_int8_t direction_detect_disable:1, /* disable internal detection of packet direction */ _pad:7; diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 875e6e755..8835a0643 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -2039,6 +2039,38 @@ u_int16_t ndpi_patricia_get_maxbits(ndpi_patricia_tree_t *tree) { /* ******************************************************************** */ +void ndpi_patricia_get_stats(ndpi_patricia_tree_t *tree, struct ndpi_patricia_tree_stats *stats) { + if(tree) { + stats->n_search = tree->stats.n_search; + stats->n_found = tree->stats.n_found; + } else { + stats->n_search = 0; + stats->n_found = 0; + } +} + +/* ******************************************************************** */ + +int ndpi_get_patricia_stats(struct ndpi_detection_module_struct *ndpi_struct, + ptree_type ptree_type, + struct ndpi_patricia_tree_stats *stats) { + switch(ptree_type) { + case NDPI_PTREE_RISK_MASK: + ndpi_patricia_get_stats((ndpi_patricia_tree_t *)ndpi_struct->ip_risk_mask_ptree, stats); + return 0; + case NDPI_PTREE_RISK: + ndpi_patricia_get_stats((ndpi_patricia_tree_t *)ndpi_struct->ip_risk_ptree, stats); + return 0; + case NDPI_PTREE_PROTOCOLS: + ndpi_patricia_get_stats((ndpi_patricia_tree_t *)ndpi_struct->protocols_ptree, stats); + return 0; + default: + return -1; + } +} + +/* ****************************************************** */ + int ndpi_fill_prefix_v4(ndpi_prefix_t *p, const struct in_addr *a, int b, int mb) { if(b < 0 || b > mb) return(-1); @@ -2841,6 +2873,43 @@ void ndpi_finalize_automa(void *_automa) { /* ****************************************************** */ +void ndpi_automa_get_stats(void *_automa, struct ndpi_automa_stats *stats) { + struct ac_stats ac_stats; + + ac_automata_get_stats((AC_AUTOMATA_t *) _automa, &ac_stats); + stats->n_search = ac_stats.n_search; + stats->n_found = ac_stats.n_found; +} + +/* ****************************************************** */ + +int ndpi_get_automa_stats(struct ndpi_detection_module_struct *ndpi_struct, + automa_type automa_type, + struct ndpi_automa_stats *stats) +{ + switch(automa_type) { + case NDPI_AUTOMA_HOST: + ndpi_automa_get_stats(ndpi_struct->host_automa.ac_automa, stats); + return 0; + case NDPI_AUTOMA_DOMAIN: + ndpi_automa_get_stats(ndpi_struct->risky_domain_automa.ac_automa, stats); + return 0; + case NDPI_AUTOMA_TLS_CERT: + ndpi_automa_get_stats(ndpi_struct->tls_cert_subject_automa.ac_automa, stats); + return 0; + case NDPI_AUTOMA_RISK_MASK: + ndpi_automa_get_stats(ndpi_struct->host_risk_mask_automa.ac_automa, stats); + return 0; + case NDPI_AUTOMA_COMMON_ALPNS: + ndpi_automa_get_stats(ndpi_struct->common_alpns_automa.ac_automa, stats); + return 0; + default: + return -1; + } +} + +/* ****************************************************** */ + static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match,size_t string_len, u_int32_t *protocol_id, ndpi_protocol_category_t *category, ndpi_protocol_breed_t *breed) { @@ -8053,7 +8122,7 @@ void ndpi_set_log_level(struct ndpi_detection_module_struct *ndpi_str, u_int l){ /* LRU cache */ struct ndpi_lru_cache *ndpi_lru_cache_init(u_int32_t num_entries) { - struct ndpi_lru_cache *c = (struct ndpi_lru_cache *) ndpi_malloc(sizeof(struct ndpi_lru_cache)); + struct ndpi_lru_cache *c = (struct ndpi_lru_cache *) ndpi_calloc(1, sizeof(struct ndpi_lru_cache)); if(!c) return(NULL); @@ -8078,10 +8147,12 @@ u_int8_t ndpi_lru_find_cache(struct ndpi_lru_cache *c, u_int32_t key, u_int16_t *value, u_int8_t clean_key_when_found) { u_int32_t slot = key % c->num_entries; + c->stats.n_search++; if(c->entries[slot].is_full && c->entries[slot].key == key) { *value = c->entries[slot].value; if(clean_key_when_found) c->entries[slot].is_full = 0; + c->stats.n_found++; return(1); } else return(0); @@ -8090,9 +8161,53 @@ u_int8_t ndpi_lru_find_cache(struct ndpi_lru_cache *c, u_int32_t key, void ndpi_lru_add_to_cache(struct ndpi_lru_cache *c, u_int32_t key, u_int16_t value) { u_int32_t slot = key % c->num_entries; + c->stats.n_insert++; c->entries[slot].is_full = 1, c->entries[slot].key = key, c->entries[slot].value = value; } +void ndpi_lru_get_stats(struct ndpi_lru_cache *c, struct ndpi_lru_cache_stats *stats) { + if(c) { + stats->n_insert = c->stats.n_insert; + stats->n_search = c->stats.n_search; + stats->n_found = c->stats.n_found; + } else { + stats->n_insert = 0; + stats->n_search = 0; + stats->n_found = 0; + } +} + +int ndpi_get_lru_cache_stats(struct ndpi_detection_module_struct *ndpi_struct, + lru_cache_type cache_type, + struct ndpi_lru_cache_stats *stats) +{ + switch(cache_type) { + case NDPI_LRUCACHE_OOKLA: + ndpi_lru_get_stats(ndpi_struct->ookla_cache, stats); + return 0; + case NDPI_LRUCACHE_BITTORRENT: + ndpi_lru_get_stats(ndpi_struct->bittorrent_cache, stats); + return 0; + case NDPI_LRUCACHE_ZOOM: + ndpi_lru_get_stats(ndpi_struct->zoom_cache, stats); + return 0; + case NDPI_LRUCACHE_STUN: + ndpi_lru_get_stats(ndpi_struct->stun_cache, stats); + return 0; + case NDPI_LRUCACHE_TLS_CERT: + ndpi_lru_get_stats(ndpi_struct->tls_cert_cache, stats); + return 0; + case NDPI_LRUCACHE_MINING: + ndpi_lru_get_stats(ndpi_struct->mining_cache, stats); + return 0; + case NDPI_LRUCACHE_MSTEAMS: + ndpi_lru_get_stats(ndpi_struct->msteams_cache, stats); + return 0; + default: + return -1; + } +} + /* ******************************************************************** */ /* diff --git a/src/lib/third_party/include/ahocorasick.h b/src/lib/third_party/include/ahocorasick.h index e59b71ccf..3eb8fdcf7 100644 --- a/src/lib/third_party/include/ahocorasick.h +++ b/src/lib/third_party/include/ahocorasick.h @@ -212,6 +212,11 @@ struct ac_path { unsigned short int idx,l; }; +struct ac_stats { + uint64_t n_search; + uint64_t n_found; +}; + typedef struct { /* The root of the Aho-Corasick trie */ @@ -236,6 +241,7 @@ typedef struct int add_to_range; /* for convert to range */ int n_oc,n_range,n_find; /* statistics */ char name[32]; /* if debug != 0 */ + struct ac_stats stats; } AC_AUTOMATA_t; typedef AC_ERROR_t (*NODE_CALLBACK_f)(AC_AUTOMATA_t *, AC_NODE_t *,int idx, void *); @@ -266,4 +272,5 @@ void ac_automata_enable_debug (int debug); /* See man open_memstream() for get result as string */ void ac_automata_dump (AC_AUTOMATA_t * thiz, FILE *); #endif +void ac_automata_get_stats(AC_AUTOMATA_t * thiz, struct ac_stats *stats); #endif diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c index 8b0d7ca0b..6f542ed77 100644 --- a/src/lib/third_party/src/ahocorasick.c +++ b/src/lib/third_party/src/ahocorasick.c @@ -434,6 +434,8 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, AC_NODE_t *next; AC_ALPHABET_t *apos; + thiz->stats.n_search++; + if(thiz->automata_open) /* you must call ac_automata_locate_failure() first */ return -1; @@ -493,15 +495,20 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, match->position = position; match->match_num = curr->matched_patterns->num; match->patterns = curr->matched_patterns->patterns; - if (thiz->match_handler(match, txt, param)) + if (thiz->match_handler(match, txt, param)) { + thiz->stats.n_found++; return 1; + } } } /* match->match_map */ } } } - if(thiz->match_handler) + if(thiz->match_handler) { + if(match->match_counter > 0) + thiz->stats.n_found++; return match->match_counter > 0 ? 1:0; + } for(i = 0; i < 4; i++) if(txt->match.matched[i]) { @@ -516,6 +523,7 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, pattern->rep.number); } #endif + thiz->stats.n_found++; return 1; } return 0; @@ -1238,5 +1246,16 @@ static inline void node_sort_edges (AC_NODE_t * thiz) } } +void ac_automata_get_stats(AC_AUTOMATA_t * thiz, struct ac_stats *stats) +{ + if (thiz) { + stats->n_search = thiz->stats.n_search; + stats->n_found = thiz->stats.n_found; + } else { + stats->n_search = 0; + stats->n_found = 0; + } +} + /* vim: set ts=4 sw=4 et : */ diff --git a/src/lib/third_party/src/ndpi_patricia.c b/src/lib/third_party/src/ndpi_patricia.c index 01138d681..3da6836a5 100644 --- a/src/lib/third_party/src/ndpi_patricia.c +++ b/src/lib/third_party/src/ndpi_patricia.c @@ -462,6 +462,8 @@ ndpi_patricia_search_exact (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi assert (prefix); assert (prefix->bitlen <= patricia->maxbits); + patricia->stats.n_search++; + if(patricia->head == NULL) return (NULL); @@ -517,6 +519,7 @@ ndpi_patricia_search_exact (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi fprintf (stderr, "patricia_search_exact: found %s/%d\n", ndpi_prefix_toa (node->prefix), node->prefix->bitlen); #endif /* PATRICIA_DEBUG */ + patricia->stats.n_found++; return (node); } return (NULL); @@ -537,6 +540,8 @@ ndpi_patricia_search_best2 (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi assert (prefix); assert (prefix->bitlen <= patricia->maxbits); + patricia->stats.n_search++; + if(patricia->head == NULL) return (NULL); @@ -614,6 +619,7 @@ ndpi_patricia_search_best2 (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi fprintf (stderr, "patricia_search_best: found %s/%d\n", ndpi_prefix_toa (node->prefix), node->prefix->bitlen); #endif /* PATRICIA_DEBUG */ + patricia->stats.n_found++; return (node); } } |