diff options
author | Vitaly Lavrov <vel21ripn@gmail.com> | 2021-07-12 15:39:43 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-12 17:39:43 +0200 |
commit | c418b7110b9385c5c3748c10e198df27ae0f7083 (patch) | |
tree | 046941f8085b48bf27b03cd60bfaee180906af21 /src/lib | |
parent | 78b1295dc18e297c1da53006bde1e0870e278db9 (diff) |
ahoсorasick. Code review. Part 2. (#1236)
Simplified the process of adding lines to AC_AUTOMATA_t.
Use the ndpi_string_to_automa() function to add patterns with domain names.
For other cases can use ndpi_add_string_value_to_automa().
ac_automata_feature(ac_automa, AC_FEATURE_LC) allows adding
and compare data in a case insensitive manner. For mandatory pattern comparison
from the end of the line, the "ac_pattern.rep.at_end=1" flag is used.
This eliminated unnecessary conversions to lowercase and adding "$" for
end-of-line matching in domain name patterns.
ac_match_handler() has been renamed ac_domain_match_handler() and has been greatly simplified.
ac_domain_match_handler() looks for the template with the highest domain level.
For special cases it is possible to manually specify the domain level.
Added test for checking ambiguous domain names like:
- short.weixin.qq.com is QQ, not Wechat
- instagram.faae1-1.fna.fbcdn.net is Instagram, not Facebook
If you specify a NULL handler when creating the AC_AUTOMATA_t structure,
then a pattern with the maximum length that satisfies the search conditions will be found
(exact match, from the beginning of the string, from the end of the string, or a substring).
Added debugging for ac_automata_search.
To do this, you need to enable debugging globally using ac_automata_enable_debug(1) and
enable debugging in the AC_AUTOMATA_t structure using ac_automata_name("name", AC_FEATURE_DEBUG).
The search will display "name" and a list of matching patterns.
Running "AHO_DEBUG=1 ndpiReader ..." will show the lines that were searched for templates
and which templates were found.
The ac_automata_dump() prototype has been changed. Now it outputs data to a file.
If it is specified as NULL, then the output will be directed to stdout.
If you need to get data as a string, then use open_memstream().
Added the ability to run individual tests via the do.sh script
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/ndpi_content_match.c.inc | 3 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 414 | ||||
-rw-r--r-- | src/lib/ndpi_utils.c | 16 | ||||
-rw-r--r-- | src/lib/third_party/include/ahocorasick.h | 32 | ||||
-rw-r--r-- | src/lib/third_party/src/ahocorasick.c | 135 |
5 files changed, 309 insertions, 291 deletions
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc index 723f4b21c..8682418f1 100644 --- a/src/lib/ndpi_content_match.c.inc +++ b/src/lib/ndpi_content_match.c.inc @@ -8852,7 +8852,7 @@ static ndpi_protocol_match host_match[] = /* Detected "instagram.c10r.facebook.com". Omitted "*amazonaws.com" and "*facebook.com" CDNs e.g. "ig-telegraph-shv-04-frc3.facebook.com" */ { ".instagram.", "Instagram", NDPI_PROTOCOL_INSTAGRAM, NDPI_PROTOCOL_CATEGORY_SOCIAL_NETWORK, NDPI_PROTOCOL_FUN }, - { "instagram.", "Instagram", NDPI_PROTOCOL_INSTAGRAM, NDPI_PROTOCOL_CATEGORY_SOCIAL_NETWORK, NDPI_PROTOCOL_FUN }, + { "instagram.", "Instagram", NDPI_PROTOCOL_INSTAGRAM, NDPI_PROTOCOL_CATEGORY_SOCIAL_NETWORK, NDPI_PROTOCOL_FUN, 15 }, { ".cdninstagram.com", "Instagram", NDPI_PROTOCOL_INSTAGRAM, NDPI_PROTOCOL_CATEGORY_SOCIAL_NETWORK, NDPI_PROTOCOL_FUN }, { "igcdn-photos-", "Instagram", NDPI_PROTOCOL_INSTAGRAM, NDPI_PROTOCOL_CATEGORY_SOCIAL_NETWORK, NDPI_PROTOCOL_FUN }, @@ -9020,6 +9020,7 @@ static ndpi_protocol_match host_match[] = { ".gmail.", "GMail", NDPI_PROTOCOL_GMAIL, NDPI_PROTOCOL_CATEGORY_MAIL, NDPI_PROTOCOL_ACCEPTABLE }, { "mail.google.", "GMail", NDPI_PROTOCOL_GMAIL, NDPI_PROTOCOL_CATEGORY_MAIL, NDPI_PROTOCOL_ACCEPTABLE }, + { "google.com", "Google", NDPI_PROTOCOL_GOOGLE, NDPI_PROTOCOL_CATEGORY_WEB, NDPI_PROTOCOL_SAFE }, { "google.", "Google", NDPI_PROTOCOL_GOOGLE, NDPI_PROTOCOL_CATEGORY_WEB, NDPI_PROTOCOL_SAFE }, { ".google.", "Google", NDPI_PROTOCOL_GOOGLE, NDPI_PROTOCOL_CATEGORY_WEB, NDPI_PROTOCOL_SAFE }, { ".gstatic.com", "Google", NDPI_PROTOCOL_GOOGLE, NDPI_PROTOCOL_CATEGORY_WEB, NDPI_PROTOCOL_SAFE }, diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 91dacd001..a1c48d781 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -542,7 +542,6 @@ static u_int8_t ndpi_is_middle_string_char(char c) { switch(c) { case '.': case '-': - case '$': /* Do not add a double $$ */ return(1); break; @@ -551,59 +550,85 @@ static u_int8_t ndpi_is_middle_string_char(char c) { } } +/*******************************************************/ + +static const u_int8_t ndpi_domain_level_automat[4][4]= { + /* symbol,'.','-',inc */ + { 2,1,2,0 }, // start state + { 2,0,0,0 }, // first char is '.'; disable .. or .- + { 2,3,2,0 }, // part of domain name + { 2,0,0,1 } // next level domain name; disable .. or .- +}; + +/* + * domain level + * a. = 1 + * .a. = 1 + * a.b = 2 + */ + +static u_int8_t ndpi_domain_level(const char *name) { + u_int8_t level = 1, state = 0; + char c; + while((c = *name++) != '\0') { + c = c == '-' ? 2 : (c == '.' ? 1:0); + level += ndpi_domain_level_automat[state][3]; + state = ndpi_domain_level_automat[state][(uint8_t)c]; + if(!state) break; + } + return state >= 2 ? level:0; +} + /* ****************************************************** */ static int ndpi_string_to_automa(struct ndpi_detection_module_struct *ndpi_str, - ndpi_automa *automa, char *value, + AC_AUTOMATA_t *ac_automa, const char *value, u_int16_t protocol_id, ndpi_protocol_category_t category, - ndpi_protocol_breed_t breed, - u_int8_t free_str_on_duplicate, u_int8_t add_ends_with) { + ndpi_protocol_breed_t breed, uint8_t level, + u_int8_t add_ends_with) { AC_PATTERN_t ac_pattern; AC_ERROR_t rc; - char buf[96]; - u_int len, dot; + u_int len; + char *value_dup = NULL; if(protocol_id >= (NDPI_MAX_SUPPORTED_PROTOCOLS + NDPI_MAX_NUM_CUSTOM_PROTOCOLS)) { NDPI_LOG_ERR(ndpi_str, "[NDPI] protoId=%d: INTERNAL ERROR\n", protocol_id); return(-1); } - if((automa->ac_automa == NULL) || (value == NULL) || !*value) + if((ac_automa == NULL) || (value == NULL) || !*value) return(-2); - len = strlen(value); - dot = len -1; + value_dup = ndpi_strdup(value); + if(!value_dup) + return(-1); memset(&ac_pattern, 0, sizeof(ac_pattern)); - if((!add_ends_with) || ndpi_is_middle_string_char(value[dot])) { - ac_pattern.length = len; - ac_pattern.astring = value; - } else { - u_int mlen = sizeof(buf)-2; - - len = ndpi_min(len, mlen); - ac_pattern.length = snprintf(buf, mlen, "%s$", value); - ndpi_free(value); - value = ndpi_strdup(buf); - ac_pattern.astring = value; - } + len = strlen(value); - ac_pattern.rep.number = protocol_id, ac_pattern.rep.category = (u_int16_t) category, - ac_pattern.rep.breed = (u_int16_t) breed; + ac_pattern.astring = value_dup; + ac_pattern.length = len; + ac_pattern.rep.number = protocol_id; + ac_pattern.rep.category = (u_int16_t) category; + ac_pattern.rep.breed = (u_int16_t) breed; + ac_pattern.rep.level = level ? level : ndpi_domain_level(value); + ac_pattern.rep.at_end = add_ends_with && !ndpi_is_middle_string_char(value[len-1]); /* len != 0 */ + ac_pattern.rep.dot = memchr(value,'.',len) != NULL; #ifdef MATCH_DEBUG - printf("Adding to automa [%s][protocol_id: %u][category: %u][breed: %u]\n", - ac_pattern.astring, protocol_id, category, breed); + printf("Adding to %s %lx [%s%s][protocol_id: %u][category: %u][breed: %u][level: %u]\n", + ac_automa->name,(unsigned long int)ac_automa, + ac_pattern.astring,ac_pattern.rep.at_end? "$":"", protocol_id, category, breed,ac_pattern.rep.level); #endif - rc = ac_automata_add(((AC_AUTOMATA_t *) automa->ac_automa), &ac_pattern); - - if((rc != ACERR_DUPLICATE_PATTERN) && (rc != ACERR_SUCCESS)) - return(-2); + rc = ac_automata_add(ac_automa, &ac_pattern); - if((rc == ACERR_DUPLICATE_PATTERN) && free_str_on_duplicate) - ndpi_free(value); + if(rc != ACERR_SUCCESS) { + ndpi_free(value_dup); + if(rc != ACERR_DUPLICATE_PATTERN) + return (-2); + } return(0); } @@ -611,25 +636,16 @@ static int ndpi_string_to_automa(struct ndpi_detection_module_struct *ndpi_str, /* ****************************************************** */ static int ndpi_add_host_url_subprotocol(struct ndpi_detection_module_struct *ndpi_str, - char *_value, int protocol_id, + char *value, int protocol_id, ndpi_protocol_category_t category, - ndpi_protocol_breed_t breed) { - int rv; - char *value = ndpi_strdup(_value); - - if(!value) - return(-1); - -#ifdef DEBUG + ndpi_protocol_breed_t breed, uint8_t level) { +#ifndef DEBUG NDPI_LOG_DBG2(ndpi_str, "[NDPI] Adding [%s][%d]\n", value, protocol_id); #endif - rv = ndpi_string_to_automa(ndpi_str, &ndpi_str->host_automa, value, protocol_id, category, breed, 1, 1); - - if(rv != 0) - ndpi_free(value); + return ndpi_string_to_automa(ndpi_str, (AC_AUTOMATA_t *)ndpi_str->host_automa.ac_automa, + value, protocol_id, category, breed, level, 1); - return(rv); } /* ****************************************************** */ @@ -668,7 +684,7 @@ void ndpi_init_protocol_match(struct ndpi_detection_module_struct *ndpi_str, ndpi_add_host_url_subprotocol(ndpi_str, match->string_to_match, match->protocol_id, match->protocol_category, - match->protocol_breed); + match->protocol_breed, match->level); } /* ******************************************************************** */ @@ -1732,75 +1748,57 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp /* ****************************************************** */ -static int ac_match_handler(AC_MATCH_t *m, AC_TEXT_t *txt, AC_REP_t *match) { - int min_len = (txt->length < m->patterns->length) ? txt->length : m->patterns->length; - char buf[64] = {'\0'}, *whatfound; - int min_buf_len = (txt->length > 63 /* sizeof(buf)-1 */) ? 63 : txt->length; - u_int buf_len = strlen(buf); - - strncpy(buf, txt->astring, min_buf_len); - buf[min_buf_len] = '\0'; +#define MATCH_DEBUG_INFO(fmt, ...) if(txt->option & AC_FEATURE_DEBUG) printf(fmt, ##__VA_ARGS__) -#ifdef MATCH_DEBUG - printf("Searching [to search: %s/%u][pattern: %s/%u] [len: %d][match_num: %u][%s]\n", buf, - (unsigned int) txt->length, m->patterns->astring, - (unsigned int) m->patterns->length, min_len, m->match_num, - m->patterns->astring); -#endif - - whatfound = strstr(buf, m->patterns->astring); - -#ifdef MATCH_DEBUG - printf("[NDPI] %s() [searching=%s][pattern=%s][%s][%c]\n", __FUNCTION__, buf, m->patterns->astring, - whatfound ? whatfound : "<NULL>", whatfound[-1]); -#endif +static int ac_domain_match_handler(AC_MATCH_t *m, AC_TEXT_t *txt, AC_REP_t *match) { + AC_PATTERN_t *pattern = m->patterns; + int i,start,end = m->position; - if(whatfound) { + for(i=0; i < m->match_num; i++,pattern++) { /* - The patch below allows in case of pattern ws.amazon.com - to avoid matching aws.amazon.com whereas a.ws.amazon.com - has to match - */ - if((whatfound != buf) - && (strchr(whatfound, '=') == NULL) /* This is not a match from tls_certificate_match[] */ - && (m->patterns->astring[0] != '.') /* The searched pattern does not start with . */ - && strchr(m->patterns->astring, '.') /* The matched pattern has a . (e.g. numeric or sym IPs) */) { - int len = strlen(m->patterns->astring); - - if(((whatfound[-1] != '.') && (whatfound[0] != '-') && (whatfound[-1] != '-')) - || ((m->patterns->astring[len - 1] != '.') - && (whatfound[len] != '\0') /* endsWith does not hold here */)) { - return(0); - } else { - memcpy(match, &m->patterns[0].rep, sizeof(AC_REP_t)); /* Partial match? */ - return(0); /* Keep searching as probably there is a better match */ - } + * See ac_automata_exact_match() + * The bit is set if the pattern exactly matches AND + * the length of the pattern is longer than that of the previous one. + * Skip shorter (less precise) templates. + */ + if(!(m->match_map & (1 << i))) + continue; + start = end - pattern->length; + + MATCH_DEBUG_INFO("[NDPI] Searching: [to search: %.*s/%u][pattern: %s%.*s%s/%u l:%u] %d-%d\n", + txt->length, txt->astring,(unsigned int) txt->length, + m->patterns[0].rep.from_start ? "^":"", + (unsigned int) pattern->length, pattern->astring, + m->patterns[0].rep.at_end ? "$":"", (unsigned int) pattern->length,m->patterns[0].rep.level, + start,end); + + if(start == 0 && end == txt->length) { + *match = pattern->rep; txt->match.last = pattern; + MATCH_DEBUG_INFO("[NDPI] Searching: Found exact match. Proto %d \n",pattern->rep.number); + return 1; + } + /* pattern is DOMAIN.NAME and string x.DOMAIN.NAME ? */ + if(start > 1 && !ndpi_is_middle_string_char(pattern->astring[0]) && pattern->rep.dot) { + /* + The patch below allows in case of pattern ws.amazon.com + to avoid matching aws.amazon.com whereas a.ws.amazon.com + has to match + */ + if(ndpi_is_middle_string_char(txt->astring[start-1])) { + if(!txt->match.last || txt->match.last->rep.level < pattern->rep.level) { + txt->match.last = pattern; *match = pattern->rep; + MATCH_DEBUG_INFO("[NDPI] Searching: Found domain match. Proto %d \n",pattern->rep.number); + } + } + continue; } - } - - /* - Return 1 for stopping to the first match. - We might consider searching for the more - specific match, paying more cpu cycles. - */ - memcpy(match, &m->patterns[0].rep, sizeof(AC_REP_t)); - if(((buf_len >= min_len) && (strncmp(&buf[buf_len - min_len], m->patterns->astring, min_len) == 0)) || - (strncmp(buf, m->patterns->astring, min_len) == 0) /* begins with */ - ) { -#ifdef MATCH_DEBUG - printf("Found match [%s][%s] [len: %d]" - // "[proto_id: %u]" - "\n", - buf, m->patterns->astring, min_len /* , *matching_protocol_id */); -#endif - return(1); /* If the pattern found matches the string at the beginning we stop here */ - } else { -#ifdef MATCH_DEBUG - printf("NO match found: continue\n"); -#endif - return(0); /* 0 to continue searching, !0 to stop */ + if(!txt->match.last || txt->match.last->rep.level < pattern->rep.level) { + txt->match.last = pattern; *match = pattern->rep; + MATCH_DEBUG_INFO("[NDPI] Searching: matched. Proto %d \n",pattern->rep.number); + } } + return 0; } /* ******************************************************************** */ @@ -2335,9 +2333,9 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs ndpi_str->ndpi_num_supported_protocols = NDPI_MAX_SUPPORTED_PROTOCOLS; ndpi_str->ndpi_num_custom_protocols = 0; - ndpi_str->host_automa.ac_automa = ac_automata_init(ac_match_handler); - ndpi_str->content_automa.ac_automa = ac_automata_init(ac_match_handler); - ndpi_str->tls_cert_subject_automa.ac_automa = ac_automata_init(ac_match_handler); + ndpi_str->host_automa.ac_automa = ac_automata_init(ac_domain_match_handler); + ndpi_str->content_automa.ac_automa = ac_automata_init(ac_domain_match_handler); + ndpi_str->tls_cert_subject_automa.ac_automa = ac_automata_init(NULL); ndpi_str->malicious_ja3_automa.ac_automa = NULL; /* Initialized on demand */ ndpi_str->malicious_sha1_automa.ac_automa = NULL; /* Initialized on demand */ ndpi_str->risky_domain_automa.ac_automa = NULL; /* Initialized on demand */ @@ -2348,19 +2346,41 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs return(NULL); } - ndpi_str->custom_categories.hostnames.ac_automa = ac_automata_init(ac_match_handler); - ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_match_handler); + ndpi_str->custom_categories.hostnames.ac_automa = ac_automata_init(ac_domain_match_handler); + ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_domain_match_handler); ndpi_str->custom_categories.ipAddresses = ndpi_patricia_new(32 /* IPv4 */); ndpi_str->custom_categories.ipAddresses_shadow = ndpi_patricia_new(32 /* IPv4 */); - if(ndpi_str->host_automa.ac_automa) + if(ndpi_str->host_automa.ac_automa) ac_automata_feature(ndpi_str->host_automa.ac_automa,AC_FEATURE_LC); + if(ndpi_str->custom_categories.hostnames.ac_automa) ac_automata_feature(ndpi_str->custom_categories.hostnames.ac_automa,AC_FEATURE_LC); + if(ndpi_str->custom_categories.hostnames_shadow.ac_automa) ac_automata_feature(ndpi_str->custom_categories.hostnames_shadow.ac_automa,AC_FEATURE_LC); + if(ndpi_str->tls_cert_subject_automa.ac_automa) + ac_automata_feature(ndpi_str->tls_cert_subject_automa.ac_automa,AC_FEATURE_LC); + + if(ndpi_str->content_automa.ac_automa) + ac_automata_feature(ndpi_str->content_automa.ac_automa,AC_FEATURE_LC); + + /* ahocorasick debug */ + /* Needed ac_automata_enable_debug(1) for show debug */ + if(ndpi_str->host_automa.ac_automa) + ac_automata_name(ndpi_str->host_automa.ac_automa,"host",AC_FEATURE_DEBUG); + if(ndpi_str->custom_categories.hostnames.ac_automa) + ac_automata_name(ndpi_str->custom_categories.hostnames.ac_automa,"ccat",0); + if(ndpi_str->custom_categories.hostnames_shadow.ac_automa) + ac_automata_name(ndpi_str->custom_categories.hostnames_shadow.ac_automa,"ccat_sh",0); + if(ndpi_str->tls_cert_subject_automa.ac_automa) + ac_automata_name(ndpi_str->tls_cert_subject_automa.ac_automa,"tls_cert",AC_FEATURE_DEBUG); + if(ndpi_str->content_automa.ac_automa) + ac_automata_name(ndpi_str->content_automa.ac_automa,"content",AC_FEATURE_DEBUG); + + if((ndpi_str->custom_categories.ipAddresses == NULL) || (ndpi_str->custom_categories.ipAddresses_shadow == NULL)) { NDPI_LOG_ERR(ndpi_str, "[NDPI] Error allocating Patricia trees\n"); return(NULL); @@ -2420,7 +2440,7 @@ void ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str) /* Wrappers */ void *ndpi_init_automa(void) { - return(ac_automata_init(ac_match_handler)); + return(ac_automata_init(ac_domain_match_handler)); } /* ****************************************************** */ @@ -2463,7 +2483,7 @@ void ndpi_finalize_automa(void *_automa) { /* ****************************************************** */ static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match,size_t string_len, - u_int16_t *protocol_id, ndpi_protocol_category_t *category, + u_int32_t *protocol_id, ndpi_protocol_category_t *category, ndpi_protocol_breed_t *breed) { AC_REP_t match = { NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED }; AC_TEXT_t ac_input_text; @@ -2481,17 +2501,9 @@ static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match } ac_input_text.astring = string_to_match, ac_input_text.length = string_len; - ac_input_text.ignore_case = 0; + ac_input_text.option = 0; rc = ac_automata_search(automa, &ac_input_text, &match); - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - if(protocol_id) *protocol_id = rc ? match.number : NDPI_PROTOCOL_UNKNOWN; @@ -2507,7 +2519,7 @@ static int ndpi_match_string_common(AC_AUTOMATA_t *automa, char *string_to_match /* ****************************************************** */ int ndpi_match_string(void *_automa, char *string_to_match) { - uint16_t proto_id; + uint32_t proto_id; int rc; if(!string_to_match) @@ -2526,58 +2538,32 @@ int ndpi_match_string_protocol_id(void *automa, char *string_to_match, u_int match_len, u_int16_t *protocol_id, ndpi_protocol_category_t *category, ndpi_protocol_breed_t *breed) { - + u_int32_t proto_id; int rc = ndpi_match_string_common((AC_AUTOMATA_t*)automa, string_to_match, - match_len, protocol_id, category, breed); + match_len, &proto_id, category, breed); if(rc < 0) return rc; - - return(*protocol_id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1); + *protocol_id = (u_int16_t)proto_id; + return(proto_id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1); } /* ****************************************************** */ -int ndpi_match_string_value(void *_automa, char *string_to_match, - u_int match_len, u_int32_t *num) { - AC_REP_t match = { NDPI_PROTOCOL_UNKNOWN, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED }; - AC_TEXT_t ac_input_text; - int rc; - AC_AUTOMATA_t *automa = (AC_AUTOMATA_t*)_automa; - - if(num) *num = 0; - - if((automa == NULL) || (string_to_match == NULL) || (string_to_match[0] == '\0')) { - return(-2); - } - - if(automa->automata_open) { - printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__); - return(-1); - } - - ac_input_text.astring = string_to_match, ac_input_text.length = match_len; - ac_input_text.ignore_case = 0; - rc = ac_automata_search(automa, &ac_input_text, &match); - - /* - As ac_automata_search can detect partial matches and continue the search process - in case rc == 0 (i.e. no match), we need to check if there is a partial match - and in this case return it - */ - if((rc == 0) && (match.number != 0)) - rc = 1; - - if(num && rc) - *num = match.number; +int ndpi_match_string_value(void *automa, char *string_to_match, + u_int match_len, u_int32_t *num) { + int rc = ndpi_match_string_common((AC_AUTOMATA_t *)automa, string_to_match, + match_len, num, NULL, NULL); + if(rc < 0) return rc; return rc ? 0 : -1; -} + } + /* *********************************************** */ int ndpi_match_custom_category(struct ndpi_detection_module_struct *ndpi_str, char *name, u_int name_len, ndpi_protocol_category_t *category) { - u_int16_t id; + u_int32_t id; int rc = ndpi_match_string_common(ndpi_str->custom_categories.hostnames.ac_automa, name, name_len, &id, category, NULL); if(rc < 0) return rc; @@ -2993,7 +2979,7 @@ int ndpi_handle_rule(struct ndpi_detection_module_struct *ndpi_str, char *rule, } else { if(do_add) ndpi_add_host_url_subprotocol(ndpi_str, value, subprotocol_id, NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, - NDPI_PROTOCOL_ACCEPTABLE); + NDPI_PROTOCOL_ACCEPTABLE,0); else ndpi_remove_host_url_subprotocol(ndpi_str, value, subprotocol_id); } @@ -3062,28 +3048,17 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, con static int ndpi_load_risky_domain(struct ndpi_detection_module_struct *ndpi_str, char* domain_name) { if(ndpi_str->risky_domain_automa.ac_automa == NULL) { - ndpi_str->risky_domain_automa.ac_automa = ac_automata_init(ac_match_handler); - if(ndpi_str->risky_domain_automa.ac_automa) - ac_automata_feature(ndpi_str->risky_domain_automa.ac_automa,AC_FEATURE_LC); + ndpi_str->risky_domain_automa.ac_automa = ac_automata_init(ac_domain_match_handler); + if(!ndpi_str->risky_domain_automa.ac_automa) return -1; + ac_automata_feature(ndpi_str->risky_domain_automa.ac_automa,AC_FEATURE_LC); + ac_automata_name(ndpi_str->risky_domain_automa.ac_automa,"risky",0); } - if(ndpi_str->risky_domain_automa.ac_automa) { - char buf[64], *str; - u_int i, len; - - snprintf(buf, sizeof(buf)-1, "%s$", domain_name); - for(i = 0, len = strlen(buf)-1 /* Skip $ */; i < len; i++) buf[i] = tolower(buf[i]); - - str = ndpi_strdup(buf); - if (str == NULL) { - NDPI_LOG_ERR(ndpi_str, "Memory allocation failure\n"); - return -1; - }; - - return(ndpi_add_string_to_automa(ndpi_str->risky_domain_automa.ac_automa, str)); - } + if(!ndpi_str->risky_domain_automa.ac_automa) + return -1; - return(-1); + return ndpi_string_to_automa(ndpi_str, (AC_AUTOMATA_t *)ndpi_str->risky_domain_automa.ac_automa, + domain_name, 1, 0, 0, 0, 1); /* domain, protocol, category, breed, level , at_end */ } /* ******************************************************************** */ @@ -3147,7 +3122,9 @@ int ndpi_load_malicious_ja3_file(struct ndpi_detection_module_struct *ndpi_str, int len, num = 0; if(ndpi_str->malicious_ja3_automa.ac_automa == NULL) - ndpi_str->malicious_ja3_automa.ac_automa = ac_automata_init(ac_match_handler); + ndpi_str->malicious_ja3_automa.ac_automa = ac_automata_init(NULL); + if(ndpi_str->malicious_ja3_automa.ac_automa) + ac_automata_name(ndpi_str->malicious_ja3_automa.ac_automa,"ja3",0); fd = fopen(path, "r"); @@ -3208,7 +3185,9 @@ int ndpi_load_malicious_sha1_file(struct ndpi_detection_module_struct *ndpi_str, int num = 0; if (ndpi_str->malicious_sha1_automa.ac_automa == NULL) - ndpi_str->malicious_sha1_automa.ac_automa = ac_automata_init(ac_match_handler); + ndpi_str->malicious_sha1_automa.ac_automa = ac_automata_init(NULL); + if(ndpi_str->malicious_sha1_automa.ac_automa) + ac_automata_name(ndpi_str->malicious_sha1_automa.ac_automa,"sha1",0); fd = fopen(path, "r"); @@ -4826,45 +4805,15 @@ int ndpi_load_ip_category(struct ndpi_detection_module_struct *ndpi_str, const c int ndpi_load_hostname_category(struct ndpi_detection_module_struct *ndpi_str, const char *name_to_add, ndpi_protocol_category_t category) { - char *name; - u_int len; - AC_PATTERN_t ac_pattern; - AC_ERROR_t rc; - if(name_to_add == NULL) - return(-1); - else - len = strlen(name_to_add); - - if((name = (char*)ndpi_malloc(len+3)) == NULL) + if(ndpi_str->custom_categories.hostnames_shadow.ac_automa == NULL) return(-1); - memset(&ac_pattern, 0, sizeof(ac_pattern)); - ac_pattern.length = snprintf(name, len+2, "%s%s", name_to_add, - ndpi_is_middle_string_char(name_to_add[len-1]) ? "" : "$"); - -#if 0 - printf("===> %s() Loading %s as %u\n", __FUNCTION__, name, category); -#endif - - if(ndpi_str->custom_categories.hostnames_shadow.ac_automa == NULL) { - ndpi_free(name); - return(-1); - } - - ac_pattern.astring = name; - ac_pattern.rep.number = (u_int32_t) category, ac_pattern.rep.category = category; - - rc = ac_automata_add(ndpi_str->custom_categories.hostnames_shadow.ac_automa, &ac_pattern); - if(rc != ACERR_DUPLICATE_PATTERN && rc != ACERR_SUCCESS) { - ndpi_free(name); + if(name_to_add == NULL) return(-1); - } - - if(rc == ACERR_DUPLICATE_PATTERN) - ndpi_free(name); - return(0); + return ndpi_string_to_automa(ndpi_str,(AC_AUTOMATA_t *)ndpi_str->custom_categories.hostnames_shadow.ac_automa, + name_to_add,category,category, 0, 0, 1); /* at_end */ } /* ********************************************************************************* */ @@ -4905,7 +4854,11 @@ int ndpi_enable_loaded_categories(struct ndpi_detection_module_struct *ndpi_str) ndpi_str->custom_categories.hostnames.ac_automa = ndpi_str->custom_categories.hostnames_shadow.ac_automa; /* Realloc */ - ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_match_handler); + ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_domain_match_handler); + if(ndpi_str->custom_categories.hostnames_shadow.ac_automa) { + ac_automata_feature(ndpi_str->custom_categories.hostnames_shadow.ac_automa,AC_FEATURE_LC); + ac_automata_name(ndpi_str->custom_categories.hostnames_shadow.ac_automa,"ccat_sh",0); + } if(ndpi_str->custom_categories.ipAddresses != NULL) ndpi_patricia_destroy((ndpi_patricia_tree_t *) ndpi_str->custom_categories.ipAddresses, free_ptree_data); @@ -6779,20 +6732,14 @@ u_int16_t ndpi_match_host_subprotocol(struct ndpi_detection_module_struct *ndpi_ char *string_to_match, u_int string_to_match_len, ndpi_protocol_match_result *ret_match, u_int16_t master_protocol_id) { - u_int16_t rc, buf_len, i; + u_int16_t rc; ndpi_protocol_category_t id; - char buf[96]; - - buf_len = ndpi_min(string_to_match_len, sizeof(buf)-2); - for(i=0; i<buf_len; i++) buf[i] = tolower(string_to_match[i]); - buf[i++] = '$'; /* Add trailer $ */ - buf[i] = '\0'; - rc = ndpi_automa_match_string_subprotocol(ndpi_str, flow, buf, i, + rc = ndpi_automa_match_string_subprotocol(ndpi_str, flow, string_to_match, string_to_match_len, master_protocol_id, ret_match, 1); id = ret_match->protocol_category; - if(ndpi_get_custom_category_match(ndpi_str, buf, i, &id) != -1) { + if(ndpi_get_custom_category_match(ndpi_str, string_to_match, string_to_match_len, &id) != -1) { /* if(id != -1) */ { flow->category = ret_match->protocol_category = id; rc = master_protocol_id; @@ -6800,8 +6747,9 @@ u_int16_t ndpi_match_host_subprotocol(struct ndpi_detection_module_struct *ndpi_ } if(ndpi_str->risky_domain_automa.ac_automa != NULL) { - u_int16_t rc1 = ndpi_match_string(ndpi_str->risky_domain_automa.ac_automa, buf); - + u_int32_t proto_id; + u_int16_t rc1 = ndpi_match_string_common(ndpi_str->risky_domain_automa.ac_automa, + string_to_match,string_to_match_len, &proto_id, NULL, NULL); if(rc1 > 0) ndpi_set_risk(flow, NDPI_RISKY_DOMAIN); } diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c index 9814733f7..95f0a4345 100644 --- a/src/lib/ndpi_utils.c +++ b/src/lib/ndpi_utils.c @@ -731,7 +731,7 @@ const char* ndpi_cipher2str(u_int32_t cipher) { /* ******************************************************************** */ -static int ndpi_is_other_char(char c) { +static inline int ndpi_is_other_char(char c) { return((c == '.') || (c == ' ') || (c == '@') @@ -741,7 +741,7 @@ static int ndpi_is_other_char(char c) { /* ******************************************************************** */ -static int ndpi_is_valid_char(char c) { +static int _ndpi_is_valid_char(char c) { if(ispunct(c) && (!ndpi_is_other_char(c))) return(0); else @@ -749,6 +749,18 @@ static int ndpi_is_valid_char(char c) { || isalpha(c) || ndpi_is_other_char(c)); } +static char ndpi_is_valid_char_tbl[256],ndpi_is_valid_char_tbl_init=0; + +static void _ndpi_is_valid_char_init(void) { + int c; + for(c=0; c < 256; c++) ndpi_is_valid_char_tbl[c] = _ndpi_is_valid_char(c); + ndpi_is_valid_char_tbl_init = 1; +} +static inline int ndpi_is_valid_char(char c) { + if(!ndpi_is_valid_char_tbl_init) + _ndpi_is_valid_char_init(); + return ndpi_is_valid_char_tbl[(unsigned char)c]; +} /* ******************************************************************** */ diff --git a/src/lib/third_party/include/ahocorasick.h b/src/lib/third_party/include/ahocorasick.h index 71fc22d0d..5efbc05f2 100644 --- a/src/lib/third_party/include/ahocorasick.h +++ b/src/lib/third_party/include/ahocorasick.h @@ -54,8 +54,11 @@ typedef char AC_ALPHABET_t; **/ typedef struct { uint32_t number; /* Often used to store procotolId */ - uint16_t breed, - category:14,from_start:1,at_end:1; + uint16_t breed, category; + uint16_t level, /* Domain level for comparison */ + from_start:1, /* match from start of string */ + at_end:1, /* match at end of string */ + dot:1; /* is domain name */ } AC_REP_t; /* AC_PATTERN_t: @@ -103,8 +106,10 @@ typedef struct { typedef struct { - AC_PATTERN_t *matched[4]; /* for ac_automata_exact_match() */ - AC_PATTERN_t *patterns; /* Array of matched pattern */ + AC_PATTERN_t *matched[4], /* for ac_automata_exact_match() */ + *last; /* for callback */ + AC_PATTERN_t *patterns; /* Array of matched pattern */ + unsigned int match_map; /* Matched patterns (bitmap) */ unsigned int position; /* The end position of matching pattern(s) in the text */ unsigned short int match_num; /* Number of matched patterns */ unsigned short int match_counter; /* Counter of found matches */ @@ -120,7 +125,7 @@ typedef struct AC_MATCH_t match; AC_ALPHABET_t * astring; /* String of alphabets */ unsigned short int length, /* Length of string */ - ignore_case; + option; /* AC_FEATURE_LC | AC_FEATURE_DEBUG */; } AC_TEXT_t; @@ -218,7 +223,7 @@ typedef struct * means not finalized (is open). after finalizing automata you can not * add pattern to automata anymore. */ unsigned short automata_open, - to_lc:1, no_root_range:1; /* lowercase match */ + to_lc:1, no_root_range:1,debug:1; /* lowercase match */ /* Statistic Variables */ unsigned long total_patterns; /* Total patterns in the automata */ @@ -229,17 +234,20 @@ typedef struct int id; /* node id */ int add_to_range; /* for convert to range */ int n_oc,n_range,n_find; /* statistics */ + char name[32]; /* if debug != 0 */ } AC_AUTOMATA_t; typedef AC_ERROR_t (*NODE_CALLBACK_f)(AC_AUTOMATA_t *, AC_NODE_t *,int idx, void *); typedef void (*ALPHA_CALLBACK_f)(AC_AUTOMATA_t *, AC_NODE_t *,AC_NODE_t *,int ,void *); -#define AC_FEATURE_LC 1 -#define AC_FEATURE_NO_ROOT_RANGE 2 +#define AC_FEATURE_DEBUG 1 +#define AC_FEATURE_LC 2 +#define AC_FEATURE_NO_ROOT_RANGE 4 AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc); AC_ERROR_t ac_automata_feature (AC_AUTOMATA_t * thiz, unsigned int feature); +AC_ERROR_t ac_automata_name (AC_AUTOMATA_t * thiz, char *name, int debug); AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * str); AC_ERROR_t ac_automata_finalize (AC_AUTOMATA_t * thiz); AC_ERROR_t ac_automata_walk (AC_AUTOMATA_t * thiz, NODE_CALLBACK_f node_cb, @@ -252,7 +260,9 @@ int ac_automata_exact_match(AC_PATTERNS_t *mp,int pos, AC_TEXT_t *); void ac_automata_clean (AC_AUTOMATA_t * thiz); void ac_automata_release (AC_AUTOMATA_t * thiz, uint8_t free_pattern); #ifndef __KERNEL__ -void ac_automata_dump (AC_AUTOMATA_t * thiz, - char *buf, size_t bufsize, char repcast); +/* Global debug control. */ +void ac_automata_enable_debug (int debug); +/* See man open_memstream() for get result as string */ +void ac_automata_dump (AC_AUTOMATA_t * thiz, FILE *); +#endif #endif -#endif diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c index ab9c5d333..06ed56a27 100644 --- a/src/lib/third_party/src/ahocorasick.c +++ b/src/lib/third_party/src/ahocorasick.c @@ -69,9 +69,11 @@ struct aho_dump_info { int buf_pos,ip; char *bufstr; size_t bufstr_len; + FILE *file; }; static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *); +static int ac_automata_global_debug = 0; #endif /* Private function prototype */ @@ -195,6 +197,20 @@ AC_ERROR_t ac_automata_feature (AC_AUTOMATA_t * thiz, unsigned int feature) return ACERR_SUCCESS; } +AC_ERROR_t ac_automata_name (AC_AUTOMATA_t * thiz, char *name, int debug) +{ + if(!thiz) return ACERR_ERROR; + strncpy(thiz->name,name,sizeof(thiz->name)-1); + thiz->debug = debug != 0; + return ACERR_SUCCESS; +} + +#ifndef __KERNEL__ +void ac_automata_enable_debug (int debug) { + ac_automata_global_debug = debug != 0; +} +#endif + /****************************************************************************** * FUNCTION: ac_automata_add * Adds pattern to the automata. @@ -368,30 +384,28 @@ int ac_automata_exact_match(AC_PATTERNS_t *mp,int pos, AC_TEXT_t *txt) { AC_PATTERN_t *patterns = mp->patterns; AC_PATTERN_t **matched = txt->match.matched; int i; - for(i=0; i < mp->num; i++,patterns++) { + int match_map = 0; + for(i=0; i < mp->num && i < (__SIZEOF_INT__*8-1); i++,patterns++) { do { if(patterns->rep.from_start && patterns->rep.at_end) { if(pos == txt->length && patterns->length == pos) - matched[0] = patterns; + matched[0] = patterns, match_map |= 1 << i; break; } if(patterns->rep.from_start) { - if(patterns->length == pos) - if(!matched[1] || patterns->length > matched[1]->length) - matched[1] = patterns; + if(patterns->length == pos) + matched[1] = patterns, match_map |= 1 << i; break; } if(patterns->rep.at_end) { - if(pos == txt->length) - if(!matched[2] || patterns->length > matched[2]->length) - matched[2] = patterns; + if(pos == txt->length) + matched[2] = patterns, match_map |= 1 << i; break; } - if(!matched[3] || patterns->length > matched[3]->length) - matched[3] = patterns; + matched[3] = patterns, match_map |= 1 << i; } while(0); } - return 0; + return match_map; } /****************************************************************************** @@ -414,7 +428,7 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * txt, AC_REP_t * param) { unsigned long position; - int icase = 0,i; + int icase = 0,i,debug=0; AC_MATCH_t *match; AC_NODE_t *curr; AC_NODE_t *next; @@ -426,14 +440,20 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, position = 0; curr = thiz->root; apos = txt->astring; +#ifndef __KERNEL__ + if(thiz->debug && ac_automata_global_debug) debug = 1; + if(debug) { + txt->option = debug; /* for callback */ + printf("aho %s: search %.*s\n", thiz->name[0] ? thiz->name:"unknown", txt->length, apos); + } +#endif match = &txt->match; memset((char*)match,0,sizeof(*match)); - icase = !thiz->to_lc; /* The 'txt->ignore_case' option is checked * separately otherwise clang will detect * uninitialized memory usage much later. */ - if(txt->ignore_case == 1) icase = 1; + if(txt->option & AC_FEATURE_LC) icase = 1; /* This is the main search loop. * it must be keep as lightweight as possible. */ while (position < txt->length) { @@ -448,19 +468,35 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, curr = next; position++; if(curr->final) { - match->match_counter++; /* we have a matching */ /* select best match */ - ac_automata_exact_match(curr->matched_patterns,position,txt); - if(thiz->match_handler) { - /* We check 'next' to find out if we came here after a alphabet - * transition or due to a fail. in second case we should not report - * matching because it was reported in previous node */ - match->position = position; - match->match_num = curr->matched_patterns->num; - match->patterns = curr->matched_patterns->patterns; - if (thiz->match_handler(match, txt, param)) - return 1; - } + match->match_map = ac_automata_exact_match(curr->matched_patterns,position,txt); + if(match->match_map) { + match->match_counter++; /* we have a matching */ +#ifndef __KERNEL__ + if(debug) { + int i; + AC_PATTERN_t *patterns = curr->matched_patterns->patterns; + for(i=0; i < curr->matched_patterns->num; i++) { + if(!(match->match_map & (1 << i))) continue; + printf(" match%d: %c%.*s%c [%u]\n",i+1, + patterns[i].rep.from_start ? '^':' ', + patterns[i].length,patterns[i].astring, + patterns[i].rep.at_end ? '$':' ', + patterns[i].rep.number); + } + } +#endif + if(thiz->match_handler) { + /* We check 'next' to find out if we came here after a alphabet + * transition or due to a fail. in second case we should not report + * matching because it was reported in previous node */ + match->position = position; + match->match_num = curr->matched_patterns->num; + match->patterns = curr->matched_patterns->patterns; + if (thiz->match_handler(match, txt, param)) + return 1; + } + } /* match->match_map */ } } } @@ -470,6 +506,16 @@ int ac_automata_search (AC_AUTOMATA_t * thiz, for(i = 0; i < 4; i++) if(txt->match.matched[i]) { *param = (txt->match.matched[i])->rep; +#ifndef __KERNEL__ + if(debug) { + AC_PATTERN_t *pattern = txt->match.matched[i]; + printf("best match: %c%.*s%c [%u]\n", + pattern->rep.from_start ? '^':' ', + pattern->length,pattern->astring, + pattern->rep.at_end ? '$':' ', + pattern->rep.number); + } +#endif return 1; } return 0; @@ -538,26 +584,26 @@ void ac_automata_release (AC_AUTOMATA_t * thiz, uint8_t free_pattern) { static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *ai) { char *c; int i; - printf("%04d: ",n->id); - if(n->failure_node) printf(" failure %04d:",n->failure_node->id); - printf(" d:%d %c",n->depth, n->use ? '+':'-'); + fprintf(ai->file,"%04d: ",n->id); + if(n->failure_node) fprintf(ai->file," failure %04d:",n->failure_node->id); + fprintf(ai->file," d:%d %c",n->depth, n->use ? '+':'-'); ai->memcnt += sizeof(*n); if(n->matched_patterns) { ai->memcnt += sizeof(n->matched_patterns) + n->matched_patterns->max*sizeof(n->matched_patterns->patterns[0]); } - if(!n->use) { printf("\n"); return; } + if(!n->use) { fprintf(ai->file,"\n"); return; } if(n->one) { (ai->node_oc)++; - printf(" '%c' next->%d\n",n->one_alpha, + fprintf(ai->file," '%c' next->%d\n",n->one_alpha, n->outgoing ? ((AC_NODE_t *)n->outgoing)->id : -1); return; } if(!n->outgoing) { - printf(" BUG! !outgoing\n"); + fprintf(ai->file," BUG! !outgoing\n"); return; } - printf("%s\n",n->range ? " RANGE":""); + fprintf(ai->file,"%s\n",n->range ? " RANGE":""); c = (char *)edge_get_alpha(n->outgoing); if(n->outgoing->degree <= 8) (ai->node_8c)++; @@ -566,7 +612,7 @@ static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *ai) { if(n->range) (ai->node_xr)++; for(i=0; i < n->outgoing->degree; i++) { - printf(" %d: \"%c\" -> %d\n",i,c[i], + fprintf(ai->file," %d: \"%c\" -> %d\n",i,c[i], n->outgoing->next[i] ? n->outgoing->next[i]->id:-1); } ai->memcnt += sizeof(n->outgoing) + edge_data_size(n->outgoing->max); @@ -580,7 +626,7 @@ static AC_ERROR_t dump_node_common(AC_AUTOMATA_t * thiz, if(idx) return ACERR_SUCCESS; dump_node_header(n,ai); if (n->matched_patterns && n->matched_patterns->num && n->final) { - char lbuf[300]; + char lbuf[512]; int nl = 0,j; nl = snprintf(lbuf,sizeof(lbuf),"'%.100s' N:%d{",rstr,n->matched_patterns->num); @@ -593,7 +639,7 @@ static AC_ERROR_t dump_node_common(AC_AUTOMATA_t * thiz, sid->astring, sid->rep.number & 0x4000 ? '$':' '); } - printf("%s}\n",lbuf); + fprintf(ai->file,"%s}\n",lbuf); } return ACERR_SUCCESS; } @@ -615,22 +661,23 @@ static void dump_node_str(AC_AUTOMATA_t * thiz, AC_NODE_t * node, * char repcast: 'n': print AC_REP_t as number, 's': print AC_REP_t as string ******************************************************************************/ -void ac_automata_dump(AC_AUTOMATA_t * thiz, char *rstr, size_t rstr_size, char repcast) { +void ac_automata_dump(AC_AUTOMATA_t * thiz, FILE *file) { struct aho_dump_info ai; memset((char *)&ai,0,sizeof(ai)); - - printf("---DUMP- all nodes %u - max strlen %u -%s---\n", + ai.file = file ? file : stdout; + fprintf(ai.file,"---DUMP- all nodes %u - max strlen %u -%s---\n", (unsigned int)thiz->all_nodes_num, (unsigned int)thiz->max_str_len, thiz->automata_open ? "open":"ready"); - printf("root: %px\n",thiz->root); - *rstr = '\0'; - ai.bufstr = rstr; - ai.bufstr_len = rstr_size; + + ai.bufstr = acho_malloc(AC_PATTRN_MAX_LENGTH+1); + ai.bufstr_len = AC_PATTRN_MAX_LENGTH; + if(!ai.bufstr) return; + ai.bufstr[0] = '\0'; ac_automata_walk(thiz,dump_node_common,dump_node_str,(void *)&ai); - printf("---\n mem size %zu avg node size %d, node one char %d, <=8c %d, >8c %d, range %d\n---DUMP-END-\n", + fprintf(ai.file,"---\n mem size %zu avg node size %d, node one char %d, <=8c %d, >8c %d, range %d\n---DUMP-END-\n", ai.memcnt,(int)ai.memcnt/(thiz->all_nodes_num+1),(int)ai.node_oc,(int)ai.node_8c,(int)ai.node_xc,(int)ai.node_xr); } #endif |