diff options
author | Luca <deri@ntop.org> | 2019-11-07 19:28:16 +0000 |
---|---|---|
committer | Luca <deri@ntop.org> | 2019-11-07 19:28:16 +0000 |
commit | 0558d641f2230795ef856e5e1e5c77050becb932 (patch) | |
tree | 83be952091b9c14d73177ce43c7ee9fdd2bc8e27 | |
parent | ac46a4dd58357b62d9b93253e47ade111efae649 (diff) |
Added ndpi_finalize_initalization() initialization function
-rw-r--r-- | example/ndpiReader.c | 2 | ||||
-rw-r--r-- | src/include/ndpi_api.h | 11 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 3 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 43 | ||||
-rw-r--r-- | src/lib/third_party/include/ahocorasick.h | 13 | ||||
-rw-r--r-- | src/lib/third_party/src/ahocorasick.c | 238 |
6 files changed, 167 insertions, 143 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index d52e4a3a9..3d9deea86 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -1911,6 +1911,8 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) { if(_customCategoryFilePath) ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath); + + ndpi_finalize_initalization(ndpi_thread_info[thread_id].workflow->ndpi_struct); } /* *********************************************** */ diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 2552800ab..ed3e40d04 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -138,6 +138,9 @@ extern "C" { /** * Returns a new initialized detection module + * Note that before you can use it you can still load + * hosts and do other things. As soon as you are ready to use + * it do not forget to call first ndpi_finalize_initalization() * * @return the initialized detection module * @@ -145,6 +148,14 @@ extern "C" { struct ndpi_detection_module_struct *ndpi_init_detection_module(void); /** + * Completes the initialization (2nd step) + * + * @return the initialized detection module + * + */ + void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str); + + /** * Frees the memory allocated in the specified flow * * @par flow = the flow to deallocate diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index f9c49c4b5..0b1572249 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1057,7 +1057,8 @@ struct ndpi_detection_module_struct { content_automa, /* Used for HTTP subprotocol_detection */ subprotocol_automa, /* Used for HTTP subprotocol_detection */ bigrams_automa, impossible_bigrams_automa; /* TOR */ - + /* IMPORTANT: please update ndpi_finalize_initalization() whenever you add a new automa */ + struct { #ifdef HAVE_HYPERSCAN struct hs *hostnames; diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 6fe1e8065..c5059bd06 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -2221,6 +2221,37 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(void) { /* *********************************************** */ +void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str) { + u_int i; + + for(i=0; i<4; i++) { + ndpi_automa *automa; + + switch(i) { + case 0: + automa = &ndpi_str->host_automa; + break; + + case 1: + automa = &ndpi_str->content_automa; + break; + + case 2: + automa = &ndpi_str->bigrams_automa; + break; + + case 3: + automa = &ndpi_str->impossible_bigrams_automa; + break; + } + + ac_automata_finalize((AC_AUTOMATA_t*)automa->ac_automa); + automa->ac_automa_finalized = 1; + } +} + +/* *********************************************** */ + /* Wrappers */ void* ndpi_init_automa(void) { return(ac_automata_init(ac_match_handler)); @@ -2261,7 +2292,6 @@ int ndpi_match_string(void *_automa, char *string_to_match) { ac_input_text.astring = string_to_match, ac_input_text.length = strlen(string_to_match); rc = ac_automata_search(automa, &ac_input_text, &match); - ac_automata_reset(automa); /* As ac_automata_search can detect partial matches and continue the search process @@ -2289,7 +2319,6 @@ int ndpi_match_string_id(void *_automa, char *string_to_match, u_int match_len, ac_input_text.astring = string_to_match, ac_input_text.length = match_len; rc = ac_automata_search(automa, &ac_input_text, &match); - ac_automata_reset(automa); /* As ac_automata_search can detect partial matches and continue the search process @@ -6072,13 +6101,12 @@ int ndpi_match_string_subprotocol(struct ndpi_detection_module_struct *ndpi_str, return(NDPI_PROTOCOL_UNKNOWN); if(!automa->ac_automa_finalized) { - ac_automata_finalize((AC_AUTOMATA_t*)automa->ac_automa); - automa->ac_automa_finalized = 1; + printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initalization()\n", __FILE__, __LINE__); + return(0); /* No matches */ } ac_input_text.astring = string_to_match, ac_input_text.length = string_to_match_len; ac_automata_search(((AC_AUTOMATA_t*)automa->ac_automa), &ac_input_text, &match); - ac_automata_reset(((AC_AUTOMATA_t*)automa->ac_automa)); /* We need to take into account also rc==0 that is used for partial matches */ ret_match->protocol_id = match.number, @@ -6256,13 +6284,12 @@ int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str, return(-1); if(!automa->ac_automa_finalized) { - ac_automata_finalize((AC_AUTOMATA_t*)automa->ac_automa); - automa->ac_automa_finalized = 1; + printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initalization()\n", __FILE__, __LINE__); + return(0); /* No matches */ } ac_input_text.astring = bigram_to_match, ac_input_text.length = 2; rc = ac_automata_search(((AC_AUTOMATA_t*)automa->ac_automa), &ac_input_text, &match); - ac_automata_reset(((AC_AUTOMATA_t*)automa->ac_automa)); /* As ac_automata_search can detect partial matches and continue the search process diff --git a/src/lib/third_party/include/ahocorasick.h b/src/lib/third_party/include/ahocorasick.h index 74812bef1..943be88eb 100644 --- a/src/lib/third_party/include/ahocorasick.h +++ b/src/lib/third_party/include/ahocorasick.h @@ -44,6 +44,12 @@ typedef struct * add pattern to automata anymore. */ unsigned short automata_open; + /* Statistic Variables */ + unsigned long total_patterns; /* Total patterns in the automata */ + +} AC_AUTOMATA_t; + +typedef struct { /* It is possible to feed a large input to the automata chunk by chunk to * be searched using ac_automata_search(). in fact by default automata * thinks that all chunks are related unless you do ac_automata_reset(). @@ -51,18 +57,13 @@ typedef struct AC_NODE_t * current_node; /* Pointer to current node while searching */ unsigned long base_position; /* Represents the position of current chunk related to whole input text */ - - /* Statistic Variables */ - unsigned long total_patterns; /* Total patterns in the automata */ - -} AC_AUTOMATA_t; +} AC_SEARCH_t; AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc); AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * str); void ac_automata_finalize (AC_AUTOMATA_t * thiz); int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * str, AC_REP_t * param); -void ac_automata_reset (AC_AUTOMATA_t * thiz); void ac_automata_release (AC_AUTOMATA_t * thiz, u_int8_t free_pattern); void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast); diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c index ffa8bac77..27134c0cc 100644 --- a/src/lib/third_party/src/ahocorasick.c +++ b/src/lib/third_party/src/ahocorasick.c @@ -3,7 +3,8 @@ * This file is part of multifast. * Copyright 2010-2012 Kamiar Kanani <kamiar.kanani@gmail.com> - + Copyright 2012-2019 ntop.org (Incremental improvements) + multifast is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -56,7 +57,6 @@ AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc) thiz->all_nodes = (AC_NODE_t **) ndpi_malloc (thiz->all_nodes_max*sizeof(AC_NODE_t *)); thiz->match_callback = mc; ac_automata_register_nodeptr (thiz, thiz->root); - ac_automata_reset (thiz); thiz->total_patterns = 0; thiz->automata_open = 1; return thiz; @@ -88,21 +88,21 @@ AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * patt) return ACERR_LONG_PATTERN; for (i=0; i<patt->length; i++) + { + alpha = patt->astring[i]; + if ((next = node_find_next(n, alpha))) { - alpha = patt->astring[i]; - if ((next = node_find_next(n, alpha))) - { - n = next; - continue; - } - else - { - next = node_create_next(n, alpha); - next->depth = n->depth + 1; - n = next; - ac_automata_register_nodeptr(thiz, n); - } + n = next; + continue; } + else + { + next = node_create_next(n, alpha); + next->depth = n->depth + 1; + n = next; + ac_automata_register_nodeptr(thiz, n); + } + } if(n->final) return ACERR_DUPLICATE_PATTERN; @@ -133,11 +133,11 @@ void ac_automata_finalize (AC_AUTOMATA_t * thiz) ac_automata_traverse_setfailure (thiz, thiz->root, alphas); for (i=0; i < thiz->all_nodes_num; i++) - { - node = thiz->all_nodes[i]; - ac_automata_union_matchstrs (node); - node_sort_edges (node); - } + { + node = thiz->all_nodes[i]; + ac_automata_union_matchstrs (node); + node_sort_edges (node); + } thiz->automata_open = 0; /* do not accept patterns any more */ ndpi_free(alphas); } @@ -159,70 +159,53 @@ void ac_automata_finalize (AC_AUTOMATA_t * thiz) * 0: success; continue searching; call-back sent me a 0 value * 1: success; stop searching; call-back sent me a non-0 value ******************************************************************************/ -int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * txt, AC_REP_t * param) -{ +int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * txt, AC_REP_t * param) { unsigned long position; AC_NODE_t *curr; AC_NODE_t *next; - + AC_SEARCH_t s; + if(thiz->automata_open) /* you must call ac_automata_locate_failure() first */ return -1; + /* Reset search */ + s.current_node = thiz->root; + s.base_position = 0; + position = 0; - curr = thiz->current_node; + curr = s.current_node; /* This is the main search loop. * it must be keep as lightweight as possible. */ - while (position < txt->length) - { - if(!(next = node_findbs_next(curr, txt->astring[position]))) - { - if(curr->failure_node /* we are not in the root node */) - curr = curr->failure_node; - else - position++; - } + while (position < txt->length) { + if(!(next = node_findbs_next(curr, txt->astring[position]))) { + if(curr->failure_node /* we are not in the root node */) + curr = curr->failure_node; else - { - curr = next; - position++; - } + position++; + } else { + curr = next; + position++; + } - if(curr->final && next) { - /* We check 'next' to find out if we came here after a alphabet - * transition or due to a fail. in second case we should not report - * matching because it was reported in previous node */ - thiz->match.position = position + thiz->base_position; - thiz->match.match_num = curr->matched_patterns_num; - thiz->match.patterns = curr->matched_patterns; - /* we found a match! do call-back */ - if (thiz->match_callback(&thiz->match, txt, param)) - return 1; - } + if(curr->final && next) { + /* We check 'next' to find out if we came here after a alphabet + * transition or due to a fail. in second case we should not report + * matching because it was reported in previous node */ + thiz->match.position = position + s.base_position; + thiz->match.match_num = curr->matched_patterns_num; + thiz->match.patterns = curr->matched_patterns; + /* we found a match! do call-back */ + if (thiz->match_callback(&thiz->match, txt, param)) + return 1; } + } - /* save status variables */ - thiz->current_node = curr; - thiz->base_position += position; return 0; } /****************************************************************************** - * FUNCTION: ac_automata_reset - * reset the automata and make it ready for doing new search on a new text. - * when you finished with the input text, you must reset automata state for - * new input, otherwise it will not work. - * PARAMS: - * AC_AUTOMATA_t * thiz: the pointer to the automata - ******************************************************************************/ -void ac_automata_reset (AC_AUTOMATA_t * thiz) -{ - thiz->current_node = thiz->root; - thiz->base_position = 0; -} - -/****************************************************************************** * FUNCTION: ac_automata_release * Release all allocated memories to the automata * PARAMS: @@ -235,10 +218,10 @@ void ac_automata_release (AC_AUTOMATA_t * thiz, u_int8_t free_pattern) AC_NODE_t * n; for (i=0; i < thiz->all_nodes_num; i++) - { - n = thiz->all_nodes[i]; - node_release(n, free_pattern); - } + { + n = thiz->all_nodes[i]; + node_release(n, free_pattern); + } ndpi_free(thiz->all_nodes); ndpi_free(thiz); } @@ -261,40 +244,40 @@ void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast) printf("---------------------------------\n"); for (i=0; i<thiz->all_nodes_num; i++) + { + n = thiz->all_nodes[i]; + printf("NODE(%3d)/----fail----> NODE(%3d)\n", + n->id, (n->failure_node)?n->failure_node->id:1); + for (j=0; j<n->outgoing_degree; j++) { - n = thiz->all_nodes[i]; - printf("NODE(%3d)/----fail----> NODE(%3d)\n", - n->id, (n->failure_node)?n->failure_node->id:1); - for (j=0; j<n->outgoing_degree; j++) + e = &n->outgoing[j]; + printf(" |----("); + if(isgraph(e->alpha)) + printf("%c)---", e->alpha); + else + printf("0x%x)", e->alpha); + printf("--> NODE(%3d)\n", e->next->id); + } + if (n->matched_patterns_num) { + printf("Accepted patterns: {"); + for (j=0; j<n->matched_patterns_num; j++) + { + sid = n->matched_patterns[j]; + if(j) printf(", "); + switch (repcast) { - e = &n->outgoing[j]; - printf(" |----("); - if(isgraph(e->alpha)) - printf("%c)---", e->alpha); - else - printf("0x%x)", e->alpha); - printf("--> NODE(%3d)\n", e->next->id); + case 'n': + printf("%u/%u/%u", + sid.rep.number, + sid.rep.category, + sid.rep.breed); + break; } - if (n->matched_patterns_num) { - printf("Accepted patterns: {"); - for (j=0; j<n->matched_patterns_num; j++) - { - sid = n->matched_patterns[j]; - if(j) printf(", "); - switch (repcast) - { - case 'n': - printf("%u/%u/%u", - sid.rep.number, - sid.rep.category, - sid.rep.breed); - break; - } - } - printf("}\n"); } - printf("---------------------------------\n"); + printf("}\n"); } + printf("---------------------------------\n"); + } } /****************************************************************************** @@ -304,13 +287,13 @@ void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast) static void ac_automata_register_nodeptr (AC_AUTOMATA_t * thiz, AC_NODE_t * node) { if(thiz->all_nodes_num >= thiz->all_nodes_max) - { - thiz->all_nodes = ndpi_realloc(thiz->all_nodes, - thiz->all_nodes_max*sizeof(AC_NODE_t *), - (REALLOC_CHUNK_ALLNODES+thiz->all_nodes_max)*sizeof(AC_NODE_t *) - ); - thiz->all_nodes_max += REALLOC_CHUNK_ALLNODES; - } + { + thiz->all_nodes = ndpi_realloc(thiz->all_nodes, + thiz->all_nodes_max*sizeof(AC_NODE_t *), + (REALLOC_CHUNK_ALLNODES+thiz->all_nodes_max)*sizeof(AC_NODE_t *) + ); + thiz->all_nodes_max += REALLOC_CHUNK_ALLNODES; + } thiz->all_nodes[thiz->all_nodes_num++] = node; } @@ -325,13 +308,13 @@ static void ac_automata_union_matchstrs (AC_NODE_t * node) AC_NODE_t * m = node; while ((m = m->failure_node)) - { - for (i=0; i < m->matched_patterns_num; i++) - node_register_matchstr(node, &(m->matched_patterns[i]), 1 /* this is an existing node */); + { + for (i=0; i < m->matched_patterns_num; i++) + node_register_matchstr(node, &(m->matched_patterns[i]), 1 /* this is an existing node */); - if (m->final) - node->final = 1; - } + if (m->final) + node->final = 1; + } // TODO : sort matched_patterns? is that necessary? I don't think so. } @@ -346,16 +329,16 @@ static void ac_automata_set_failure AC_NODE_t * m; for (i=1; i < node->depth; i++) + { + m = thiz->root; + for (j=i; j < node->depth && m; j++) + m = node_find_next (m, alphas[j]); + if (m) { - m = thiz->root; - for (j=i; j < node->depth && m; j++) - m = node_find_next (m, alphas[j]); - if (m) - { - node->failure_node = m; - break; - } + node->failure_node = m; + break; } + } if (!node->failure_node) node->failure_node = thiz->root; } @@ -373,15 +356,14 @@ static void ac_automata_traverse_setfailure unsigned int i; AC_NODE_t * next; - for (i=0; i < node->outgoing_degree; i++) - { - alphas[node->depth] = node->outgoing[i].alpha; - next = node->outgoing[i].next; + for (i=0; i < node->outgoing_degree; i++) { + alphas[node->depth] = node->outgoing[i].alpha; + next = node->outgoing[i].next; - /* At every node look for its failure node */ - ac_automata_set_failure (thiz, next, alphas); + /* At every node look for its failure node */ + ac_automata_set_failure (thiz, next, alphas); - /* Recursively call itself to traverse all nodes */ - ac_automata_traverse_setfailure (thiz, next, alphas); - } + /* Recursively call itself to traverse all nodes */ + ac_automata_traverse_setfailure (thiz, next, alphas); + } } |