aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIvan Nardi <12729895+IvanNardi@users.noreply.github.com>2022-07-29 12:07:41 +0200
committerGitHub <noreply@github.com>2022-07-29 12:07:41 +0200
commitbb83899985c25097341b947c2c535f56254a075c (patch)
tree8f40308c2e4885fc41bd5db5a233cb6956ad7697 /src
parent95e16872fadfc98256fdcf729f267237c727e4c3 (diff)
Patricia tree, Ahocarasick automa, LRU cache: add statistics (#1677)
Add (basic) internal stats to the main data structures used by the library; they might be usefull to check how effective these structures are. Add an option to `ndpiReader` to dump them; disabled by default to avoid too much fuss with the unit tests.
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h.in32
-rw-r--r--src/include/ndpi_patricia_typedefs.h6
-rw-r--r--src/include/ndpi_typedefs.h58
-rw-r--r--src/lib/ndpi_main.c117
-rw-r--r--src/lib/third_party/include/ahocorasick.h7
-rw-r--r--src/lib/third_party/src/ahocorasick.c23
-rw-r--r--src/lib/third_party/src/ndpi_patricia.c6
7 files changed, 242 insertions, 7 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in
index a069d0571..ab853d402 100644
--- a/src/include/ndpi_api.h.in
+++ b/src/include/ndpi_api.h.in
@@ -926,6 +926,28 @@ extern "C" {
void ndpi_finalize_automa(void *_automa);
/**
+ * Get the automa statistics
+ *
+ * @par The automata initialized with ndpi_init_automa();
+ *
+ */
+
+ void ndpi_automa_get_stats(void *_automa, struct ndpi_automa_stats *stats);
+
+ /**
+ * Get the statistics of one of the automas used internally by the library
+ *
+ * @par ndpi_mod = the detection module
+ * @par automa_type = of which automa we want the stats
+ * @par stats = buffer where to save the stats
+ * @return 0 in case of no error, or -1 if an error occurred.
+ *
+ */
+
+ int ndpi_get_automa_stats(struct ndpi_detection_module_struct *ndpi_struct,
+ automa_type automa_type,
+ struct ndpi_automa_stats *stats);
+ /**
* Add a string to match to an automata
*
* @par The automata initialized with ndpi_init_automa();
@@ -982,6 +1004,11 @@ extern "C" {
u_int8_t ndpi_lru_find_cache(struct ndpi_lru_cache *c, u_int32_t key,
u_int16_t *value, u_int8_t clean_key_when_found);
void ndpi_lru_add_to_cache(struct ndpi_lru_cache *c, u_int32_t key, u_int16_t value);
+ void ndpi_lru_get_stats(struct ndpi_lru_cache *c, struct ndpi_lru_cache_stats *stats);
+
+ int ndpi_get_lru_cache_stats(struct ndpi_detection_module_struct *ndpi_struct,
+ lru_cache_type cache_type,
+ struct ndpi_lru_cache_stats *stats);
/**
* Find a protocol id associated with a string automata
@@ -1096,6 +1123,11 @@ extern "C" {
ndpi_prefix_t *ndpi_patricia_get_node_prefix(ndpi_patricia_node_t *node);
u_int16_t ndpi_patricia_get_node_bits(ndpi_patricia_node_t *node);
u_int16_t ndpi_patricia_get_maxbits(ndpi_patricia_tree_t *tree);
+ void ndpi_patricia_get_stats(ndpi_patricia_tree_t *tree, struct ndpi_patricia_tree_stats *stats);
+
+ int ndpi_get_patricia_stats(struct ndpi_detection_module_struct *ndpi_struct,
+ ptree_type ptree_type,
+ struct ndpi_patricia_tree_stats *stats);
/* ptree (trie) API - a wrapper on top of Patricia that seamlessly handle IPv4 and IPv6 */
ndpi_ptree_t* ndpi_ptree_create(void);
diff --git a/src/include/ndpi_patricia_typedefs.h b/src/include/ndpi_patricia_typedefs.h
index c9f7fddac..a75ee416f 100644
--- a/src/include/ndpi_patricia_typedefs.h
+++ b/src/include/ndpi_patricia_typedefs.h
@@ -99,10 +99,16 @@ typedef struct _ndpi_patricia_node_t {
union ndpi_patricia_node_value_t value;
} ndpi_patricia_node_t;
+struct ndpi_patricia_tree_stats {
+ u_int64_t n_search;
+ u_int64_t n_found;
+};
+
typedef struct _ndpi_patricia_tree_t {
ndpi_patricia_node_t *head;
u_int16_t maxbits; /* for IP, 32 bit addresses */
int num_active_node; /* for debug purpose */
+ struct ndpi_patricia_tree_stats stats;
} ndpi_patricia_tree_t;
#endif /* _NDPI_PATRICIA_TYPEDEF_H_ */
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index dde19c488..f5fccfd56 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -595,13 +595,55 @@ typedef enum {
NDPI_HTTP_METHOD_RPC_OUT_DATA,
} ndpi_http_method;
+typedef enum {
+ NDPI_PTREE_RISK_MASK = 0,
+ NDPI_PTREE_RISK,
+ NDPI_PTREE_PROTOCOLS,
+
+ NDPI_PTREE_MAX /* Last one! */
+} ptree_type;
+
+typedef enum {
+ NDPI_AUTOMA_HOST = 0,
+ NDPI_AUTOMA_DOMAIN,
+ NDPI_AUTOMA_TLS_CERT,
+ NDPI_AUTOMA_RISK_MASK,
+ NDPI_AUTOMA_COMMON_ALPNS,
+
+ NDPI_AUTOMA_MAX /* Last one! */
+} automa_type;
+
+struct ndpi_automa_stats {
+ u_int64_t n_search;
+ u_int64_t n_found;
+};
+
+typedef enum {
+ NDPI_LRUCACHE_OOKLA = 0,
+ NDPI_LRUCACHE_BITTORRENT,
+ NDPI_LRUCACHE_ZOOM,
+ NDPI_LRUCACHE_STUN,
+ NDPI_LRUCACHE_TLS_CERT,
+ NDPI_LRUCACHE_MINING,
+ NDPI_LRUCACHE_MSTEAMS,
+
+ NDPI_LRUCACHE_MAX /* Last one! */
+} lru_cache_type;
+
struct ndpi_lru_cache_entry {
u_int32_t key; /* Store the whole key to avoid ambiguities */
u_int32_t is_full:1, value:16, pad:15;
};
+struct ndpi_lru_cache_stats {
+ u_int64_t n_insert;
+ u_int64_t n_search;
+ u_int64_t n_found;
+};
+
struct ndpi_lru_cache {
u_int32_t num_entries;
+ struct ndpi_lru_cache_stats stats;
struct ndpi_lru_cache_entry *entries;
};
@@ -1013,6 +1055,7 @@ typedef struct ndpi_default_ports_tree_node {
typedef struct _ndpi_automa {
void *ac_automa; /* Real type is AC_AUTOMATA_t */
+ struct ndpi_automa_stats stats;
} ndpi_automa;
typedef struct ndpi_str_hash {
@@ -1111,23 +1154,28 @@ struct ndpi_detection_module_struct {
ndpi_automa host_automa, /* Used for DNS/HTTPS */
risky_domain_automa, tls_cert_subject_automa,
host_risk_mask_automa, common_alpns_automa;
+ /* IMPORTANT: please, whenever you add a new automa:
+ * update ndpi_finalize_initialization()
+ * update automa_type above
+ */
+
ndpi_str_hash *malicious_ja3_hashmap, *malicious_sha1_hashmap;
- /* IMPORTANT: please update ndpi_finalize_initialization() whenever you add a new automa */
ndpi_list *trusted_issuer_dn;
void *ip_risk_mask_ptree;
void *ip_risk_ptree;
+ /* IP-based protocol detection */
+ void *protocols_ptree;
+ /* *** If you add a new Patricia tree, please update ptree_type above! *** */
+
struct {
ndpi_automa hostnames, hostnames_shadow;
void *ipAddresses, *ipAddresses_shadow; /* Patricia */
u_int8_t categories_loaded;
} custom_categories;
- /* IP-based protocol detection */
- void *protocols_ptree;
-
u_int8_t ip_version_limit;
/* NDPI_PROTOCOL_OOKLA */
@@ -1154,6 +1202,8 @@ struct ndpi_detection_module_struct {
/* NDPI_PROTOCOL_MSTEAMS */
struct ndpi_lru_cache *msteams_cache;
+ /* *** If you add a new LRU cache, please update lru_cache_type above! *** */
+
ndpi_proto_defaults_t proto_defaults[NDPI_MAX_SUPPORTED_PROTOCOLS+NDPI_MAX_NUM_CUSTOM_PROTOCOLS];
u_int8_t direction_detect_disable:1, /* disable internal detection of packet direction */ _pad:7;
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 875e6e755..8835a0643 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -2039,6 +2039,38 @@ u_int16_t ndpi_patricia_get_maxbits(ndpi_patricia_tree_t *tree) {
/* ******************************************************************** */
+void ndpi_patricia_get_stats(ndpi_patricia_tree_t *tree, struct ndpi_patricia_tree_stats *stats) {
+ if(tree) {
+ stats->n_search = tree->stats.n_search;
+ stats->n_found = tree->stats.n_found;
+ } else {
+ stats->n_search = 0;
+ stats->n_found = 0;
+ }
+}
+
+/* ******************************************************************** */
+
+int ndpi_get_patricia_stats(struct ndpi_detection_module_struct *ndpi_struct,
+ ptree_type ptree_type,
+ struct ndpi_patricia_tree_stats *stats) {
+ switch(ptree_type) {
+ case NDPI_PTREE_RISK_MASK:
+ ndpi_patricia_get_stats((ndpi_patricia_tree_t *)ndpi_struct->ip_risk_mask_ptree, stats);
+ return 0;
+ case NDPI_PTREE_RISK:
+ ndpi_patricia_get_stats((ndpi_patricia_tree_t *)ndpi_struct->ip_risk_ptree, stats);
+ return 0;
+ case NDPI_PTREE_PROTOCOLS:
+ ndpi_patricia_get_stats((ndpi_patricia_tree_t *)ndpi_struct->protocols_ptree, stats);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/* ****************************************************** */
+
int ndpi_fill_prefix_v4(ndpi_prefix_t *p, const struct in_addr *a, int b, int mb) {
if(b < 0 || b > mb)
return(-1);
@@ -2841,6 +2873,43 @@ void ndpi_finalize_automa(void *_automa) {
/* ****************************************************** */
+void ndpi_automa_get_stats(void *_automa, struct ndpi_automa_stats *stats) {
+ struct ac_stats ac_stats;
+
+ ac_automata_get_stats((AC_AUTOMATA_t *) _automa, &ac_stats);
+ stats->n_search = ac_stats.n_search;
+ stats->n_found = ac_stats.n_found;
+}
+
+/* ****************************************************** */
+
+int ndpi_get_automa_stats(struct ndpi_detection_module_struct *ndpi_struct,
+ automa_type automa_type,
+ struct ndpi_automa_stats *stats)
+{
+ switch(automa_type) {
+ case NDPI_AUTOMA_HOST:
+ ndpi_automa_get_stats(ndpi_struct->host_automa.ac_automa, stats);
+ return 0;
+ case NDPI_AUTOMA_DOMAIN:
+ ndpi_automa_get_stats(ndpi_struct->risky_domain_automa.ac_automa, stats);
+ return 0;
+ case NDPI_AUTOMA_TLS_CERT:
+ ndpi_automa_get_stats(ndpi_struct->tls_cert_subject_automa.ac_automa, stats);
+ return 0;
+ case NDPI_AUTOMA_RISK_MASK:
+ ndpi_automa_get_stats(ndpi_struct->host_risk_mask_automa.ac_automa, stats);
+ return 0;
+ case NDPI_AUTOMA_COMMON_ALPNS:
+ ndpi_automa_get_stats(ndpi_struct->common_alpns_automa.ac_automa, stats);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/* ****************************************************** */
+
static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match,size_t string_len,
u_int32_t *protocol_id, ndpi_protocol_category_t *category,
ndpi_protocol_breed_t *breed) {
@@ -8053,7 +8122,7 @@ void ndpi_set_log_level(struct ndpi_detection_module_struct *ndpi_str, u_int l){
/* LRU cache */
struct ndpi_lru_cache *ndpi_lru_cache_init(u_int32_t num_entries) {
- struct ndpi_lru_cache *c = (struct ndpi_lru_cache *) ndpi_malloc(sizeof(struct ndpi_lru_cache));
+ struct ndpi_lru_cache *c = (struct ndpi_lru_cache *) ndpi_calloc(1, sizeof(struct ndpi_lru_cache));
if(!c)
return(NULL);
@@ -8078,10 +8147,12 @@ u_int8_t ndpi_lru_find_cache(struct ndpi_lru_cache *c, u_int32_t key,
u_int16_t *value, u_int8_t clean_key_when_found) {
u_int32_t slot = key % c->num_entries;
+ c->stats.n_search++;
if(c->entries[slot].is_full && c->entries[slot].key == key) {
*value = c->entries[slot].value;
if(clean_key_when_found)
c->entries[slot].is_full = 0;
+ c->stats.n_found++;
return(1);
} else
return(0);
@@ -8090,9 +8161,53 @@ u_int8_t ndpi_lru_find_cache(struct ndpi_lru_cache *c, u_int32_t key,
void ndpi_lru_add_to_cache(struct ndpi_lru_cache *c, u_int32_t key, u_int16_t value) {
u_int32_t slot = key % c->num_entries;
+ c->stats.n_insert++;
c->entries[slot].is_full = 1, c->entries[slot].key = key, c->entries[slot].value = value;
}
+void ndpi_lru_get_stats(struct ndpi_lru_cache *c, struct ndpi_lru_cache_stats *stats) {
+ if(c) {
+ stats->n_insert = c->stats.n_insert;
+ stats->n_search = c->stats.n_search;
+ stats->n_found = c->stats.n_found;
+ } else {
+ stats->n_insert = 0;
+ stats->n_search = 0;
+ stats->n_found = 0;
+ }
+}
+
+int ndpi_get_lru_cache_stats(struct ndpi_detection_module_struct *ndpi_struct,
+ lru_cache_type cache_type,
+ struct ndpi_lru_cache_stats *stats)
+{
+ switch(cache_type) {
+ case NDPI_LRUCACHE_OOKLA:
+ ndpi_lru_get_stats(ndpi_struct->ookla_cache, stats);
+ return 0;
+ case NDPI_LRUCACHE_BITTORRENT:
+ ndpi_lru_get_stats(ndpi_struct->bittorrent_cache, stats);
+ return 0;
+ case NDPI_LRUCACHE_ZOOM:
+ ndpi_lru_get_stats(ndpi_struct->zoom_cache, stats);
+ return 0;
+ case NDPI_LRUCACHE_STUN:
+ ndpi_lru_get_stats(ndpi_struct->stun_cache, stats);
+ return 0;
+ case NDPI_LRUCACHE_TLS_CERT:
+ ndpi_lru_get_stats(ndpi_struct->tls_cert_cache, stats);
+ return 0;
+ case NDPI_LRUCACHE_MINING:
+ ndpi_lru_get_stats(ndpi_struct->mining_cache, stats);
+ return 0;
+ case NDPI_LRUCACHE_MSTEAMS:
+ ndpi_lru_get_stats(ndpi_struct->msteams_cache, stats);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
/* ******************************************************************** */
/*
diff --git a/src/lib/third_party/include/ahocorasick.h b/src/lib/third_party/include/ahocorasick.h
index e59b71ccf..3eb8fdcf7 100644
--- a/src/lib/third_party/include/ahocorasick.h
+++ b/src/lib/third_party/include/ahocorasick.h
@@ -212,6 +212,11 @@ struct ac_path {
unsigned short int idx,l;
};
+struct ac_stats {
+ uint64_t n_search;
+ uint64_t n_found;
+};
+
typedef struct
{
/* The root of the Aho-Corasick trie */
@@ -236,6 +241,7 @@ typedef struct
int add_to_range; /* for convert to range */
int n_oc,n_range,n_find; /* statistics */
char name[32]; /* if debug != 0 */
+ struct ac_stats stats;
} AC_AUTOMATA_t;
typedef AC_ERROR_t (*NODE_CALLBACK_f)(AC_AUTOMATA_t *, AC_NODE_t *,int idx, void *);
@@ -266,4 +272,5 @@ void ac_automata_enable_debug (int debug);
/* See man open_memstream() for get result as string */
void ac_automata_dump (AC_AUTOMATA_t * thiz, FILE *);
#endif
+void ac_automata_get_stats(AC_AUTOMATA_t * thiz, struct ac_stats *stats);
#endif
diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c
index 8b0d7ca0b..6f542ed77 100644
--- a/src/lib/third_party/src/ahocorasick.c
+++ b/src/lib/third_party/src/ahocorasick.c
@@ -434,6 +434,8 @@ int ac_automata_search (AC_AUTOMATA_t * thiz,
AC_NODE_t *next;
AC_ALPHABET_t *apos;
+ thiz->stats.n_search++;
+
if(thiz->automata_open)
/* you must call ac_automata_locate_failure() first */
return -1;
@@ -493,15 +495,20 @@ int ac_automata_search (AC_AUTOMATA_t * thiz,
match->position = position;
match->match_num = curr->matched_patterns->num;
match->patterns = curr->matched_patterns->patterns;
- if (thiz->match_handler(match, txt, param))
+ if (thiz->match_handler(match, txt, param)) {
+ thiz->stats.n_found++;
return 1;
+ }
}
} /* match->match_map */
}
}
}
- if(thiz->match_handler)
+ if(thiz->match_handler) {
+ if(match->match_counter > 0)
+ thiz->stats.n_found++;
return match->match_counter > 0 ? 1:0;
+ }
for(i = 0; i < 4; i++)
if(txt->match.matched[i]) {
@@ -516,6 +523,7 @@ int ac_automata_search (AC_AUTOMATA_t * thiz,
pattern->rep.number);
}
#endif
+ thiz->stats.n_found++;
return 1;
}
return 0;
@@ -1238,5 +1246,16 @@ static inline void node_sort_edges (AC_NODE_t * thiz)
}
}
+void ac_automata_get_stats(AC_AUTOMATA_t * thiz, struct ac_stats *stats)
+{
+ if (thiz) {
+ stats->n_search = thiz->stats.n_search;
+ stats->n_found = thiz->stats.n_found;
+ } else {
+ stats->n_search = 0;
+ stats->n_found = 0;
+ }
+}
+
/* vim: set ts=4 sw=4 et : */
diff --git a/src/lib/third_party/src/ndpi_patricia.c b/src/lib/third_party/src/ndpi_patricia.c
index 01138d681..3da6836a5 100644
--- a/src/lib/third_party/src/ndpi_patricia.c
+++ b/src/lib/third_party/src/ndpi_patricia.c
@@ -462,6 +462,8 @@ ndpi_patricia_search_exact (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi
assert (prefix);
assert (prefix->bitlen <= patricia->maxbits);
+ patricia->stats.n_search++;
+
if(patricia->head == NULL)
return (NULL);
@@ -517,6 +519,7 @@ ndpi_patricia_search_exact (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi
fprintf (stderr, "patricia_search_exact: found %s/%d\n",
ndpi_prefix_toa (node->prefix), node->prefix->bitlen);
#endif /* PATRICIA_DEBUG */
+ patricia->stats.n_found++;
return (node);
}
return (NULL);
@@ -537,6 +540,8 @@ ndpi_patricia_search_best2 (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi
assert (prefix);
assert (prefix->bitlen <= patricia->maxbits);
+ patricia->stats.n_search++;
+
if(patricia->head == NULL)
return (NULL);
@@ -614,6 +619,7 @@ ndpi_patricia_search_best2 (ndpi_patricia_tree_t *patricia, ndpi_prefix_t *prefi
fprintf (stderr, "patricia_search_best: found %s/%d\n",
ndpi_prefix_toa (node->prefix), node->prefix->bitlen);
#endif /* PATRICIA_DEBUG */
+ patricia->stats.n_found++;
return (node);
}
}