aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVitaly Lavrov <vel21ripn@gmail.com>2021-06-15 09:28:09 +0000
committerGitHub <noreply@github.com>2021-06-15 11:28:09 +0200
commitbe808c30f3f4582009df4c5efccd4f3bb0c6ef1d (patch)
treef16d861560fecd97d23e5de1826d8fdc9bcd0049
parent09293fabd9dcb92812641788509a1a766e9320f8 (diff)
Code review. (#1205)
The common actions required to call the ac_automata_search() function have been moved to the ndpi_match_string_common function. This made it possible to simplify the ndpi_match_string, ndpi_match_string_protocol_id, ndpi_match_string_value, ndpi_match_custom_category, ndpi_match_string_subprotocol, ndpi_match_bigram, ndpi_match_trigram functions. Using u_int16_t type for protocol identifiers when working with the ahocorasick library (changes src/include/ndpi_api.h.in and src/include/ndpi_typedefs.h). Reworked "finalization" of all AC_AUTOMATA_t structures. Changing the order of fields in the ndpi_call_function_struct structure reduces the size of the ndpi_detection_module_struct structure by 10 kB (for x86_64).
-rw-r--r--src/include/ndpi_api.h.in2
-rw-r--r--src/include/ndpi_typedefs.h8
-rw-r--r--src/lib/ndpi_main.c195
-rw-r--r--src/lib/protocols/tls.c4
-rw-r--r--src/lib/third_party/src/ahocorasick.c19
5 files changed, 73 insertions, 155 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in
index f2da3b186..d2c1b62bf 100644
--- a/src/include/ndpi_api.h.in
+++ b/src/include/ndpi_api.h.in
@@ -106,7 +106,7 @@ extern "C" {
via ndpi_add_string_value_to_automa()
*/
int ndpi_match_string_value(void *_automa, char *string_to_match,
- u_int match_len, u_int32_t *num);
+ u_int match_len, u_int16_t *num);
/**
* nDPI personal allocation and free functions
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 4656b4af2..4fee4e4d4 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -923,11 +923,11 @@ struct ndpi_detection_module_struct;
struct ndpi_flow_struct;
struct ndpi_call_function_struct {
- u_int16_t ndpi_protocol_id;
NDPI_PROTOCOL_BITMASK detection_bitmask;
NDPI_PROTOCOL_BITMASK excluded_protocol_bitmask;
- NDPI_SELECTION_BITMASK_PROTOCOL_SIZE ndpi_selection_bitmask;
void (*func) (struct ndpi_detection_module_struct *, struct ndpi_flow_struct *flow);
+ NDPI_SELECTION_BITMASK_PROTOCOL_SIZE ndpi_selection_bitmask;
+ u_int16_t ndpi_protocol_id;
u_int8_t detection_feature;
};
@@ -1056,7 +1056,6 @@ typedef struct ndpi_default_ports_tree_node {
typedef struct _ndpi_automa {
void *ac_automa; /* Real type is AC_AUTOMATA_t */
- u_int8_t ac_automa_finalized;
} ndpi_automa;
typedef struct ndpi_proto {
@@ -1149,6 +1148,7 @@ struct ndpi_detection_module_struct {
u_int ndpi_num_supported_protocols;
u_int ndpi_num_custom_protocols;
+ int ac_automa_finalized;
/* HTTP/DNS/HTTPS/QUIC host matching */
ndpi_automa host_automa, /* Used for DNS/HTTPS */
content_automa, /* Used for HTTP subprotocol_detection */
@@ -1526,7 +1526,7 @@ typedef enum
} ndpi_prefs;
typedef struct {
- int protocol_id;
+ u_int16_t protocol_id;
ndpi_protocol_category_t protocol_category;
ndpi_protocol_breed_t protocol_breed;
} ndpi_protocol_match_result;
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 25b9259ea..b7da69d3f 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -569,7 +569,7 @@ static int ndpi_string_to_automa(struct ndpi_detection_module_struct *ndpi_str,
return(-1);
}
- if((automa->ac_automa == NULL) || (value == NULL))
+ if((automa->ac_automa == NULL) || (value == NULL) || !*value)
return(-2);
len = strlen(value);
@@ -2348,6 +2348,8 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs
void ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str) {
u_int i;
+ if(ndpi_str->ac_automa_finalized) return;
+
for(i = 0; i < 99; i++) {
ndpi_automa *automa;
@@ -2385,13 +2387,12 @@ void ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str)
break;
default:
+ ndpi_str->ac_automa_finalized = 1;
return;
}
- if(automa && automa->ac_automa) {
- ac_automata_finalize((AC_AUTOMATA_t *) automa->ac_automa);
- automa->ac_automa_finalized = 1;
- }
+ if(automa && automa->ac_automa)
+ ac_automata_finalize((AC_AUTOMATA_t *) automa->ac_automa);
}
}
@@ -2441,16 +2442,25 @@ void ndpi_finalize_automa(void *_automa) {
/* ****************************************************** */
-int ndpi_match_string(void *_automa, char *string_to_match) {
+static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match,size_t string_len,
+ u_int16_t *protocol_id, ndpi_protocol_category_t *category,
+ ndpi_protocol_breed_t *breed) {
AC_REP_t match = { NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED };
AC_TEXT_t ac_input_text;
- AC_AUTOMATA_t *automa = (AC_AUTOMATA_t *) _automa;
int rc;
- if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0'))
+ if(protocol_id) *protocol_id = NDPI_PROTOCOL_UNKNOWN;
+
+ if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0')) {
return(-2);
+ }
- ac_input_text.astring = string_to_match, ac_input_text.length = strlen(string_to_match);
+ if(automa->automata_open) {
+ printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__);
+ return(-1);
+ }
+
+ ac_input_text.astring = string_to_match, ac_input_text.length = string_len;
ac_input_text.ignore_case = 0;
rc = ac_automata_search(automa, &ac_input_text, &match);
@@ -2462,76 +2472,48 @@ int ndpi_match_string(void *_automa, char *string_to_match) {
if((rc == 0) && (match.number != 0))
rc = 1;
- return(rc ? match.number : 0);
+ if(protocol_id)
+ *protocol_id = rc ? match.number:NDPI_PROTOCOL_UNKNOWN;
+ if(category)
+ *category = rc ? match.category:0;
+ if(breed)
+ *breed = rc ? match.breed:0;
+ return rc;
+}
+
+int ndpi_match_string(void *_automa, char *string_to_match) {
+ uint16_t proto_id;
+ int rc;
+ if(!string_to_match)
+ return(-2);
+ rc = ndpi_match_string_common(_automa,string_to_match,strlen(string_to_match),
+ &proto_id, NULL, NULL);
+ if(rc < 0) return rc;
+ return rc ? proto_id : NDPI_PROTOCOL_UNKNOWN;
}
/* ****************************************************** */
-int ndpi_match_string_protocol_id(void *_automa, char *string_to_match,
+int ndpi_match_string_protocol_id(void *automa, char *string_to_match,
u_int match_len, u_int16_t *protocol_id,
ndpi_protocol_category_t *category,
ndpi_protocol_breed_t *breed) {
- AC_TEXT_t ac_input_text;
- AC_AUTOMATA_t *automa = (AC_AUTOMATA_t *) _automa;
- AC_REP_t match = { 0, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED };
- int rc;
-
- *protocol_id = (u_int16_t)-1;
- if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0'))
- return(-2);
-
- ac_input_text.astring = string_to_match, ac_input_text.length = match_len;
- ac_input_text.ignore_case = 0;
- rc = ac_automata_search(automa, &ac_input_text, &match);
- /*
- As ac_automata_search can detect partial matches and continue the search process
- in case rc == 0 (i.e. no match), we need to check if there is a partial match
- and in this case return it
- */
- if((rc == 0) && (match.number != 0))
- rc = 1;
-
- if(rc)
- *protocol_id = (u_int16_t)match.number, *category = match.category,
- *breed = match.breed;
- else
- *protocol_id = NDPI_PROTOCOL_UNKNOWN;
-
- return((*protocol_id != NDPI_PROTOCOL_UNKNOWN) ? 0 : -1);
+ int rc = ndpi_match_string_common((AC_AUTOMATA_t*)automa, string_to_match,
+ match_len, protocol_id, category, breed);
+ if(rc < 0) return rc;
+ return(*protocol_id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1);
}
/* ****************************************************** */
-int ndpi_match_string_value(void *_automa, char *string_to_match,
- u_int match_len, u_int32_t *num) {
- AC_TEXT_t ac_input_text;
- AC_AUTOMATA_t *automa = (AC_AUTOMATA_t *) _automa;
- AC_REP_t match = { 0, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED };
- int rc;
-
- *num = (u_int32_t)-1;
- if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0'))
- return(-2);
-
- ac_input_text.astring = string_to_match, ac_input_text.length = match_len;
- ac_input_text.ignore_case = 0;
- rc = ac_automata_search(automa, &ac_input_text, &match);
-
- /*
- As ac_automata_search can detect partial matches and continue the search process
- in case rc == 0 (i.e. no match), we need to check if there is a partial match
- and in this case return it
- */
- if((rc == 0) && (match.number != 0))
- rc = 1;
-
- if(rc)
- *num = match.number;
- else
- *num = 0;
+int ndpi_match_string_value(void *automa, char *string_to_match,
+ u_int match_len, u_int16_t *num) {
- return(rc ? 0 : -1);
+ int rc = ndpi_match_string_common((AC_AUTOMATA_t *)automa, string_to_match,
+ match_len, num, NULL, NULL);
+ if(rc < 0) return rc;
+ return rc ? 0 : -1;
}
/* *********************************************** */
@@ -2539,12 +2521,11 @@ int ndpi_match_string_value(void *_automa, char *string_to_match,
int ndpi_match_custom_category(struct ndpi_detection_module_struct *ndpi_str,
char *name, u_int name_len,
ndpi_protocol_category_t *category) {
- ndpi_protocol_breed_t breed;
u_int16_t id;
- int rc = ndpi_match_string_protocol_id(ndpi_str->custom_categories.hostnames.ac_automa,
- name, name_len, &id, category, &breed);
-
- return(rc);
+ int rc = ndpi_match_string_common(ndpi_str->custom_categories.hostnames.ac_automa,
+ name, name_len, &id, category, NULL);
+ if(rc < 0) return rc;
+ return(id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1);
}
/* *********************************************** */
@@ -6637,36 +6618,16 @@ int ndpi_match_prefix(const u_int8_t *payload,
int ndpi_match_string_subprotocol(struct ndpi_detection_module_struct *ndpi_str, char *string_to_match,
u_int string_to_match_len, ndpi_protocol_match_result *ret_match,
u_int8_t is_host_match) {
- AC_TEXT_t ac_input_text;
ndpi_automa *automa = is_host_match ? &ndpi_str->host_automa : &ndpi_str->content_automa;
- AC_REP_t match = {NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED};
int rc;
if((automa->ac_automa == NULL) || (string_to_match_len == 0))
return(NDPI_PROTOCOL_UNKNOWN);
- if(!automa->ac_automa_finalized) {
- printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__);
- return(0); /* No matches */
- }
-
- ac_input_text.astring = string_to_match, ac_input_text.length = string_to_match_len;
- ac_input_text.ignore_case = 0;
- rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match);
-
- /*
- As ac_automata_search can detect partial matches and continue the search process
- in case rc == 0 (i.e. no match), we need to check if there is a partial match
- and in this case return it
- */
- if((rc == 0) && (match.number != 0))
- rc = 1;
-
- /* We need to take into account also rc == 0 that is used for partial matches */
- ret_match->protocol_id = match.number, ret_match->protocol_category = match.category,
- ret_match->protocol_breed = match.breed;
-
- return(rc ? match.number : 0);
+ rc = ndpi_match_string_common(((AC_AUTOMATA_t *) automa->ac_automa),
+ string_to_match,string_to_match_len, &ret_match->protocol_id,
+ &ret_match->protocol_category, &ret_match->protocol_breed);
+ return rc < 0 ? rc : ret_match->protocol_id;
}
/* **************************************** */
@@ -6693,7 +6654,7 @@ static u_int16_t ndpi_automa_match_string_subprotocol(struct ndpi_detection_modu
struct ndpi_flow_struct *flow, char *string_to_match,
u_int string_to_match_len, u_int16_t master_protocol_id,
ndpi_protocol_match_result *ret_match, u_int8_t is_host_match) {
- int matching_protocol_id;
+ uint16_t matching_protocol_id;
struct ndpi_packet_struct *packet = &flow->packet;
matching_protocol_id =
@@ -6825,14 +6786,11 @@ u_int16_t ndpi_match_content_subprotocol(struct ndpi_detection_module_struct *nd
int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str,
ndpi_automa *automa, char *bigram_to_match) {
- AC_TEXT_t ac_input_text;
- AC_REP_t match = {NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED};
- int rc;
if((automa->ac_automa == NULL) || (bigram_to_match == NULL))
return(-1);
- if(!automa->ac_automa_finalized) {
+ if(!ndpi_str->ac_automa_finalized) {
#if 1
ndpi_finalize_initialization(ndpi_str);
#else
@@ -6840,34 +6798,19 @@ int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str,
return(0); /* No matches */
#endif
}
-
- ac_input_text.astring = bigram_to_match, ac_input_text.length = 2;
- ac_input_text.ignore_case = 0;
- rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match);
-
- /*
- As ac_automata_search can detect partial matches and continue the search process
- in case rc == 0 (i.e. no match), we need to check if there is a partial match
- and in this case return it
- */
- if((rc == 0) && (match.number != 0))
- rc = 1;
-
- return(rc ? match.number : 0);
+ return ndpi_match_string_common(automa->ac_automa,bigram_to_match,2, NULL, NULL, NULL);
}
/* ****************************************************** */
int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str,
ndpi_automa *automa, char *trigram_to_match) {
- AC_TEXT_t ac_input_text;
- AC_REP_t match = {NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED};
int rc;
if((automa->ac_automa == NULL) || (trigram_to_match == NULL))
return(-1);
- if(!automa->ac_automa_finalized) {
+ if(!ndpi_str->ac_automa_finalized) {
#if 1
ndpi_finalize_initialization(ndpi_str);
#else
@@ -6875,20 +6818,8 @@ int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str,
return(0); /* No matches */
#endif
}
-
- ac_input_text.astring = trigram_to_match, ac_input_text.length = 3;
- ac_input_text.ignore_case = 0;
- rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match);
-
- /*
- As ac_automata_search can detect partial matches and continue the search process
- in case rc == 0 (i.e. no match), we need to check if there is a partial match
- and in this case return it
- */
- if((rc == 0) && (match.number != 0))
- rc = 1;
-
- if(ndpi_verbose_dga_detection && rc && match.number) {
+ rc = ndpi_match_string_common(automa->ac_automa,trigram_to_match,3, NULL, NULL, NULL);
+ if(ndpi_verbose_dga_detection && rc) {
printf("[%s:%d] [NDPI] Trigram %c%c%c\n",
__FILE__, __LINE__,
trigram_to_match[0],
@@ -6896,7 +6827,7 @@ int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str,
trigram_to_match[2]);
}
- return(rc ? match.number : 0);
+ return(rc);
}
/* ****************************************************** */
diff --git a/src/lib/protocols/tls.c b/src/lib/protocols/tls.c
index 4b54b47a0..5f15d7f14 100644
--- a/src/lib/protocols/tls.c
+++ b/src/lib/protocols/tls.c
@@ -591,9 +591,9 @@ static void processCertificateElements(struct ndpi_detection_module_struct *ndpi
if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) {
/* No idea what is happening behind the scenes: let's check the certificate */
- u_int32_t proto_id;
+ u_int16_t proto_id;
int rc = ndpi_match_string_value(ndpi_struct->tls_cert_subject_automa.ac_automa,
- rdnSeqBuf, strlen(rdnSeqBuf),&proto_id);
+ rdnSeqBuf, strlen(rdnSeqBuf), &proto_id);
if(rc == 0) {
/* Match found */
diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c
index 745afa30e..ab9c5d333 100644
--- a/src/lib/third_party/src/ahocorasick.c
+++ b/src/lib/third_party/src/ahocorasick.c
@@ -413,7 +413,6 @@ int ac_automata_exact_match(AC_PATTERNS_t *mp,int pos, AC_TEXT_t *txt) {
int ac_automata_search (AC_AUTOMATA_t * thiz,
AC_TEXT_t * txt, AC_REP_t * param)
{
- uint8_t alpha;
unsigned long position;
int icase = 0,i;
AC_MATCH_t *match;
@@ -438,7 +437,7 @@ int ac_automata_search (AC_AUTOMATA_t * thiz,
/* This is the main search loop.
* it must be keep as lightweight as possible. */
while (position < txt->length) {
- alpha = (uint8_t)apos[position];
+ uint8_t alpha = (uint8_t)apos[position];
if(thiz->to_lc) alpha = aho_lc[alpha];
if(!(next = node_findbs_next_ac(curr, (uint8_t)alpha, icase))) {
if(curr->failure_node) /* we are not in the root node */
@@ -918,7 +917,7 @@ static AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha)
return next;
}
-static inline int mp_data_size(int n) {
+static inline size_t mp_data_size(int n) {
return sizeof(AC_PATTERNS_t) + n*sizeof(AC_PATTERN_t);
}
@@ -1104,14 +1103,12 @@ static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node)
return 1;
}
-// if(e->degree < __SIZEOF_LONG__) return 0;
-
i = (high - low)/8;
if (i < thiz->add_to_range) i = thiz->add_to_range;
i += REALLOC_CHUNK_OUTGOING-1;
i -= i % REALLOC_CHUNK_OUTGOING;
- if(high - low + 1 < e->max + i) {
+ if(high - low + 1 < e->max + i || (node->root && !thiz->no_root_range)) {
int added = (high - low + 1) - e->max;
struct edge *new_o = node_resize_outgoing(node->outgoing,added);
if(new_o) {
@@ -1122,16 +1119,6 @@ static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node)
return 0;
}
- if(node->root && !thiz->no_root_range) {
- struct edge *new_o;
- int added = (high - low + 1) - e->max;
- new_o = node_resize_outgoing(node->outgoing,added);
- if(new_o) {
- node->outgoing = new_o;
- acho_2range(node,low,high);
- return 1;
- }
- }
return 0;
}
/******************************************************************************