diff options
author | Vitaly Lavrov <vel21ripn@gmail.com> | 2021-06-15 09:28:09 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-15 11:28:09 +0200 |
commit | be808c30f3f4582009df4c5efccd4f3bb0c6ef1d (patch) | |
tree | f16d861560fecd97d23e5de1826d8fdc9bcd0049 | |
parent | 09293fabd9dcb92812641788509a1a766e9320f8 (diff) |
Code review. (#1205)
The common actions required to call the ac_automata_search() function
have been moved to the ndpi_match_string_common function. This made it
possible to simplify the ndpi_match_string, ndpi_match_string_protocol_id,
ndpi_match_string_value, ndpi_match_custom_category, ndpi_match_string_subprotocol,
ndpi_match_bigram, ndpi_match_trigram functions.
Using u_int16_t type for protocol identifiers when working with the
ahocorasick library (changes src/include/ndpi_api.h.in and src/include/ndpi_typedefs.h).
Reworked "finalization" of all AC_AUTOMATA_t structures.
Changing the order of fields in the ndpi_call_function_struct structure
reduces the size of the ndpi_detection_module_struct structure by 10 kB (for x86_64).
-rw-r--r-- | src/include/ndpi_api.h.in | 2 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 8 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 195 | ||||
-rw-r--r-- | src/lib/protocols/tls.c | 4 | ||||
-rw-r--r-- | src/lib/third_party/src/ahocorasick.c | 19 |
5 files changed, 73 insertions, 155 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index f2da3b186..d2c1b62bf 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -106,7 +106,7 @@ extern "C" { via ndpi_add_string_value_to_automa() */ int ndpi_match_string_value(void *_automa, char *string_to_match, - u_int match_len, u_int32_t *num); + u_int match_len, u_int16_t *num); /** * nDPI personal allocation and free functions diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 4656b4af2..4fee4e4d4 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -923,11 +923,11 @@ struct ndpi_detection_module_struct; struct ndpi_flow_struct; struct ndpi_call_function_struct { - u_int16_t ndpi_protocol_id; NDPI_PROTOCOL_BITMASK detection_bitmask; NDPI_PROTOCOL_BITMASK excluded_protocol_bitmask; - NDPI_SELECTION_BITMASK_PROTOCOL_SIZE ndpi_selection_bitmask; void (*func) (struct ndpi_detection_module_struct *, struct ndpi_flow_struct *flow); + NDPI_SELECTION_BITMASK_PROTOCOL_SIZE ndpi_selection_bitmask; + u_int16_t ndpi_protocol_id; u_int8_t detection_feature; }; @@ -1056,7 +1056,6 @@ typedef struct ndpi_default_ports_tree_node { typedef struct _ndpi_automa { void *ac_automa; /* Real type is AC_AUTOMATA_t */ - u_int8_t ac_automa_finalized; } ndpi_automa; typedef struct ndpi_proto { @@ -1149,6 +1148,7 @@ struct ndpi_detection_module_struct { u_int ndpi_num_supported_protocols; u_int ndpi_num_custom_protocols; + int ac_automa_finalized; /* HTTP/DNS/HTTPS/QUIC host matching */ ndpi_automa host_automa, /* Used for DNS/HTTPS */ content_automa, /* Used for HTTP subprotocol_detection */ @@ -1526,7 +1526,7 @@ typedef enum } ndpi_prefs; typedef struct { - int protocol_id; + u_int16_t protocol_id; ndpi_protocol_category_t protocol_category; ndpi_protocol_breed_t protocol_breed; } ndpi_protocol_match_result; diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 25b9259ea..b7da69d3f 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -569,7 +569,7 @@ static int ndpi_string_to_automa(struct ndpi_detection_module_struct *ndpi_str, return(-1); } - if((automa->ac_automa == NULL) || (value == NULL)) + if((automa->ac_automa == NULL) || (value == NULL) || !*value) return(-2); len = strlen(value); @@ -2348,6 +2348,8 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs void ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str) { u_int i; + if(ndpi_str->ac_automa_finalized) return; + for(i = 0; i < 99; i++) { ndpi_automa *automa; @@ -2385,13 +2387,12 @@ void ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str) break; default: + ndpi_str->ac_automa_finalized = 1; return; } - if(automa && automa->ac_automa) { - ac_automata_finalize((AC_AUTOMATA_t *) automa->ac_automa); - automa->ac_automa_finalized = 1; - } + if(automa && automa->ac_automa) + ac_automata_finalize((AC_AUTOMATA_t *) automa->ac_automa); } } @@ -2441,16 +2442,25 @@ void ndpi_finalize_automa(void *_automa) { /* ****************************************************** */ -int ndpi_match_string(void *_automa, char *string_to_match) { +static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match,size_t string_len, + u_int16_t *protocol_id, ndpi_protocol_category_t *category, + ndpi_protocol_breed_t *breed) { AC_REP_t match = { NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED }; AC_TEXT_t ac_input_text; - AC_AUTOMATA_t *automa = (AC_AUTOMATA_t *) _automa; int rc; - if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0')) + if(protocol_id) *protocol_id = NDPI_PROTOCOL_UNKNOWN; + + if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0')) { return(-2); + } - ac_input_text.astring = string_to_match, ac_input_text.length = strlen(string_to_match); + if(automa->automata_open) { + printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__); + return(-1); + } + + ac_input_text.astring = string_to_match, ac_input_text.length = string_len; ac_input_text.ignore_case = 0; rc = ac_automata_search(automa, &ac_input_text, &match); @@ -2462,76 +2472,48 @@ int ndpi_match_string(void *_automa, char *string_to_match) { if((rc == 0) && (match.number != 0)) rc = 1; - return(rc ? match.number : 0); + if(protocol_id) + *protocol_id = rc ? match.number:NDPI_PROTOCOL_UNKNOWN; + if(category) + *category = rc ? match.category:0; + if(breed) + *breed = rc ? match.breed:0; + return rc; +} + +int ndpi_match_string(void *_automa, char *string_to_match) { + uint16_t proto_id; + int rc; + if(!string_to_match) + return(-2); + rc = ndpi_match_string_common(_automa,string_to_match,strlen(string_to_match), + &proto_id, NULL, NULL); + if(rc < 0) return rc; + return rc ? proto_id : NDPI_PROTOCOL_UNKNOWN; } /* ****************************************************** */ -int ndpi_match_string_protocol_id(void *_automa, char *string_to_match, +int ndpi_match_string_protocol_id(void *automa, char *string_to_match, u_int match_len, u_int16_t *protocol_id, ndpi_protocol_category_t *category, ndpi_protocol_breed_t *breed) { - AC_TEXT_t ac_input_text; - AC_AUTOMATA_t *automa = (AC_AUTOMATA_t *) _automa; - AC_REP_t match = { 0, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED }; - int rc; - - *protocol_id = (u_int16_t)-1; - if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0')) - return(-2); - - ac_input_text.astring = string_to_match, ac_input_text.length = match_len; - ac_input_text.ignore_case = 0; - rc = ac_automata_search(automa, &ac_input_text, &match); - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - - if(rc) - *protocol_id = (u_int16_t)match.number, *category = match.category, - *breed = match.breed; - else - *protocol_id = NDPI_PROTOCOL_UNKNOWN; - - return((*protocol_id != NDPI_PROTOCOL_UNKNOWN) ? 0 : -1); + int rc = ndpi_match_string_common((AC_AUTOMATA_t*)automa, string_to_match, + match_len, protocol_id, category, breed); + if(rc < 0) return rc; + return(*protocol_id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1); } /* ****************************************************** */ -int ndpi_match_string_value(void *_automa, char *string_to_match, - u_int match_len, u_int32_t *num) { - AC_TEXT_t ac_input_text; - AC_AUTOMATA_t *automa = (AC_AUTOMATA_t *) _automa; - AC_REP_t match = { 0, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED }; - int rc; - - *num = (u_int32_t)-1; - if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0')) - return(-2); - - ac_input_text.astring = string_to_match, ac_input_text.length = match_len; - ac_input_text.ignore_case = 0; - rc = ac_automata_search(automa, &ac_input_text, &match); - - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - - if(rc) - *num = match.number; - else - *num = 0; +int ndpi_match_string_value(void *automa, char *string_to_match, + u_int match_len, u_int16_t *num) { - return(rc ? 0 : -1); + int rc = ndpi_match_string_common((AC_AUTOMATA_t *)automa, string_to_match, + match_len, num, NULL, NULL); + if(rc < 0) return rc; + return rc ? 0 : -1; } /* *********************************************** */ @@ -2539,12 +2521,11 @@ int ndpi_match_string_value(void *_automa, char *string_to_match, int ndpi_match_custom_category(struct ndpi_detection_module_struct *ndpi_str, char *name, u_int name_len, ndpi_protocol_category_t *category) { - ndpi_protocol_breed_t breed; u_int16_t id; - int rc = ndpi_match_string_protocol_id(ndpi_str->custom_categories.hostnames.ac_automa, - name, name_len, &id, category, &breed); - - return(rc); + int rc = ndpi_match_string_common(ndpi_str->custom_categories.hostnames.ac_automa, + name, name_len, &id, category, NULL); + if(rc < 0) return rc; + return(id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1); } /* *********************************************** */ @@ -6637,36 +6618,16 @@ int ndpi_match_prefix(const u_int8_t *payload, int ndpi_match_string_subprotocol(struct ndpi_detection_module_struct *ndpi_str, char *string_to_match, u_int string_to_match_len, ndpi_protocol_match_result *ret_match, u_int8_t is_host_match) { - AC_TEXT_t ac_input_text; ndpi_automa *automa = is_host_match ? &ndpi_str->host_automa : &ndpi_str->content_automa; - AC_REP_t match = {NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED}; int rc; if((automa->ac_automa == NULL) || (string_to_match_len == 0)) return(NDPI_PROTOCOL_UNKNOWN); - if(!automa->ac_automa_finalized) { - printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__); - return(0); /* No matches */ - } - - ac_input_text.astring = string_to_match, ac_input_text.length = string_to_match_len; - ac_input_text.ignore_case = 0; - rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match); - - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - - /* We need to take into account also rc == 0 that is used for partial matches */ - ret_match->protocol_id = match.number, ret_match->protocol_category = match.category, - ret_match->protocol_breed = match.breed; - - return(rc ? match.number : 0); + rc = ndpi_match_string_common(((AC_AUTOMATA_t *) automa->ac_automa), + string_to_match,string_to_match_len, &ret_match->protocol_id, + &ret_match->protocol_category, &ret_match->protocol_breed); + return rc < 0 ? rc : ret_match->protocol_id; } /* **************************************** */ @@ -6693,7 +6654,7 @@ static u_int16_t ndpi_automa_match_string_subprotocol(struct ndpi_detection_modu struct ndpi_flow_struct *flow, char *string_to_match, u_int string_to_match_len, u_int16_t master_protocol_id, ndpi_protocol_match_result *ret_match, u_int8_t is_host_match) { - int matching_protocol_id; + uint16_t matching_protocol_id; struct ndpi_packet_struct *packet = &flow->packet; matching_protocol_id = @@ -6825,14 +6786,11 @@ u_int16_t ndpi_match_content_subprotocol(struct ndpi_detection_module_struct *nd int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str, ndpi_automa *automa, char *bigram_to_match) { - AC_TEXT_t ac_input_text; - AC_REP_t match = {NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED}; - int rc; if((automa->ac_automa == NULL) || (bigram_to_match == NULL)) return(-1); - if(!automa->ac_automa_finalized) { + if(!ndpi_str->ac_automa_finalized) { #if 1 ndpi_finalize_initialization(ndpi_str); #else @@ -6840,34 +6798,19 @@ int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str, return(0); /* No matches */ #endif } - - ac_input_text.astring = bigram_to_match, ac_input_text.length = 2; - ac_input_text.ignore_case = 0; - rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match); - - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - - return(rc ? match.number : 0); + return ndpi_match_string_common(automa->ac_automa,bigram_to_match,2, NULL, NULL, NULL); } /* ****************************************************** */ int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str, ndpi_automa *automa, char *trigram_to_match) { - AC_TEXT_t ac_input_text; - AC_REP_t match = {NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED}; int rc; if((automa->ac_automa == NULL) || (trigram_to_match == NULL)) return(-1); - if(!automa->ac_automa_finalized) { + if(!ndpi_str->ac_automa_finalized) { #if 1 ndpi_finalize_initialization(ndpi_str); #else @@ -6875,20 +6818,8 @@ int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str, return(0); /* No matches */ #endif } - - ac_input_text.astring = trigram_to_match, ac_input_text.length = 3; - ac_input_text.ignore_case = 0; - rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match); - - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - - if(ndpi_verbose_dga_detection && rc && match.number) { + rc = ndpi_match_string_common(automa->ac_automa,trigram_to_match,3, NULL, NULL, NULL); + if(ndpi_verbose_dga_detection && rc) { printf("[%s:%d] [NDPI] Trigram %c%c%c\n", __FILE__, __LINE__, trigram_to_match[0], @@ -6896,7 +6827,7 @@ int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str, trigram_to_match[2]); } - return(rc ? match.number : 0); + return(rc); } /* ****************************************************** */ diff --git a/src/lib/protocols/tls.c b/src/lib/protocols/tls.c index 4b54b47a0..5f15d7f14 100644 --- a/src/lib/protocols/tls.c +++ b/src/lib/protocols/tls.c @@ -591,9 +591,9 @@ static void processCertificateElements(struct ndpi_detection_module_struct *ndpi if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) { /* No idea what is happening behind the scenes: let's check the certificate */ - u_int32_t proto_id; + u_int16_t proto_id; int rc = ndpi_match_string_value(ndpi_struct->tls_cert_subject_automa.ac_automa, - rdnSeqBuf, strlen(rdnSeqBuf),&proto_id); + rdnSeqBuf, strlen(rdnSeqBuf), &proto_id); if(rc == 0) { /* Match found */ diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c index 745afa30e..ab9c5d333 100644 --- a/src/lib/third_party/src/ahocorasick.c +++ b/src/lib/third_party/src/ahocorasick.c @@ -413,7 +413,6 @@ int ac_automata_exact_match(AC_PATTERNS_t *mp,int pos, AC_TEXT_t *txt) { int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * txt, AC_REP_t * param) { - uint8_t alpha; unsigned long position; int icase = 0,i; AC_MATCH_t *match; @@ -438,7 +437,7 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, /* This is the main search loop. * it must be keep as lightweight as possible. */ while (position < txt->length) { - alpha = (uint8_t)apos[position]; + uint8_t alpha = (uint8_t)apos[position]; if(thiz->to_lc) alpha = aho_lc[alpha]; if(!(next = node_findbs_next_ac(curr, (uint8_t)alpha, icase))) { if(curr->failure_node) /* we are not in the root node */ @@ -918,7 +917,7 @@ static AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha) return next; } -static inline int mp_data_size(int n) { +static inline size_t mp_data_size(int n) { return sizeof(AC_PATTERNS_t) + n*sizeof(AC_PATTERN_t); } @@ -1104,14 +1103,12 @@ static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node) return 1; } -// if(e->degree < __SIZEOF_LONG__) return 0; - i = (high - low)/8; if (i < thiz->add_to_range) i = thiz->add_to_range; i += REALLOC_CHUNK_OUTGOING-1; i -= i % REALLOC_CHUNK_OUTGOING; - if(high - low + 1 < e->max + i) { + if(high - low + 1 < e->max + i || (node->root && !thiz->no_root_range)) { int added = (high - low + 1) - e->max; struct edge *new_o = node_resize_outgoing(node->outgoing,added); if(new_o) { @@ -1122,16 +1119,6 @@ static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node) return 0; } - if(node->root && !thiz->no_root_range) { - struct edge *new_o; - int added = (high - low + 1) - e->max; - new_o = node_resize_outgoing(node->outgoing,added); - if(new_o) { - node->outgoing = new_o; - acho_2range(node,low,high); - return 1; - } - } return 0; } /****************************************************************************** |