From 65678dbeeabcf51e02ba3cc9ffbd36324a92a971 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrov Date: Mon, 7 Jun 2021 12:19:40 +0000 Subject: New version of the ahocorasick library (#1200) The new version is about 25% faster with -O2 and 45% faster with -O3. No recursion is used (smaller stack size required). Uses less memory (by valgrind info) bigram: - original 1796 allocs, 247864 bytes allocated - new 1232 allocs, 158880 bytes allocated host_match: - original 18038 allocs, 3004576 bytes allocated - new 6861 allocs, 396624 bytes allocated The function ac_automata_search() is thread safe. Optional case-insensitive comparison. Matching at the beginning and at the end of the string is supported. One code file and one header file. --- src/lib/ndpi_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/lib/ndpi_main.c') diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index a7b736969..2596b03eb 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -2319,6 +2319,13 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs ndpi_str->custom_categories.ipAddresses = ndpi_patricia_new(32 /* IPv4 */); ndpi_str->custom_categories.ipAddresses_shadow = ndpi_patricia_new(32 /* IPv4 */); + if(ndpi_str->host_automa.ac_automa) + ac_automata_feature(ndpi_str->host_automa.ac_automa,AC_FEATURE_LC); + if(ndpi_str->custom_categories.hostnames.ac_automa) + ac_automata_feature(ndpi_str->custom_categories.hostnames.ac_automa,AC_FEATURE_LC); + if(ndpi_str->custom_categories.hostnames_shadow.ac_automa) + ac_automata_feature(ndpi_str->custom_categories.hostnames_shadow.ac_automa,AC_FEATURE_LC); + if((ndpi_str->custom_categories.ipAddresses == NULL) || (ndpi_str->custom_categories.ipAddresses_shadow == NULL)) { NDPI_LOG_ERR(ndpi_str, "[NDPI] Error allocating Patricia trees\n"); return(NULL); @@ -2441,6 +2448,7 @@ int ndpi_match_string(void *_automa, char *string_to_match) { return(-2); ac_input_text.astring = string_to_match, ac_input_text.length = strlen(string_to_match); + ac_input_text.ignore_case = 0; rc = ac_automata_search(automa, &ac_input_text, &match); /* @@ -2470,6 +2478,7 @@ int ndpi_match_string_protocol_id(void *_automa, char *string_to_match, return(-2); ac_input_text.astring = string_to_match, ac_input_text.length = match_len; + ac_input_text.ignore_case = 0; rc = ac_automata_search(automa, &ac_input_text, &match); /* @@ -2503,6 +2512,7 @@ int ndpi_match_string_value(void *_automa, char *string_to_match, return(-2); ac_input_text.astring = string_to_match, ac_input_text.length = match_len; + ac_input_text.ignore_case = 0; rc = ac_automata_search(automa, &ac_input_text, &match); /* @@ -3020,8 +3030,11 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, con static int ndpi_load_risky_domain(struct ndpi_detection_module_struct *ndpi_str, char* domain_name) { - if(ndpi_str->risky_domain_automa.ac_automa == NULL) + if(ndpi_str->risky_domain_automa.ac_automa == NULL) { ndpi_str->risky_domain_automa.ac_automa = ac_automata_init(ac_match_handler); + if(ndpi_str->risky_domain_automa.ac_automa) + ac_automata_feature(ndpi_str->risky_domain_automa.ac_automa,AC_FEATURE_LC); + } if(ndpi_str->risky_domain_automa.ac_automa) { char buf[64], *str; @@ -6649,6 +6662,7 @@ int ndpi_match_string_subprotocol(struct ndpi_detection_module_struct *ndpi_str, } ac_input_text.astring = string_to_match, ac_input_text.length = string_to_match_len; + ac_input_text.ignore_case = 0; rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match); /* @@ -6839,6 +6853,7 @@ int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str, } ac_input_text.astring = bigram_to_match, ac_input_text.length = 2; + ac_input_text.ignore_case = 0; rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match); /* @@ -6873,6 +6888,7 @@ int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str, } ac_input_text.astring = trigram_to_match, ac_input_text.length = 3; + ac_input_text.ignore_case = 0; rc = ac_automata_search(((AC_AUTOMATA_t *) automa->ac_automa), &ac_input_text, &match); /* -- cgit v1.2.3