aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca <deri@ntop.org>2019-11-07 19:28:16 +0000
committerLuca <deri@ntop.org>2019-11-07 19:28:16 +0000
commit0558d641f2230795ef856e5e1e5c77050becb932 (patch)
tree83be952091b9c14d73177ce43c7ee9fdd2bc8e27
parentac46a4dd58357b62d9b93253e47ade111efae649 (diff)
Added ndpi_finalize_initalization() initialization function
-rw-r--r--example/ndpiReader.c2
-rw-r--r--src/include/ndpi_api.h11
-rw-r--r--src/include/ndpi_typedefs.h3
-rw-r--r--src/lib/ndpi_main.c43
-rw-r--r--src/lib/third_party/include/ahocorasick.h13
-rw-r--r--src/lib/third_party/src/ahocorasick.c238
6 files changed, 167 insertions, 143 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index d52e4a3a9..3d9deea86 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -1911,6 +1911,8 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) {
if(_customCategoryFilePath)
ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath);
+
+ ndpi_finalize_initalization(ndpi_thread_info[thread_id].workflow->ndpi_struct);
}
/* *********************************************** */
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 2552800ab..ed3e40d04 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -138,6 +138,9 @@ extern "C" {
/**
* Returns a new initialized detection module
+ * Note that before you can use it you can still load
+ * hosts and do other things. As soon as you are ready to use
+ * it do not forget to call first ndpi_finalize_initalization()
*
* @return the initialized detection module
*
@@ -145,6 +148,14 @@ extern "C" {
struct ndpi_detection_module_struct *ndpi_init_detection_module(void);
/**
+ * Completes the initialization (2nd step)
+ *
+ * @return the initialized detection module
+ *
+ */
+ void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str);
+
+ /**
* Frees the memory allocated in the specified flow
*
* @par flow = the flow to deallocate
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index f9c49c4b5..0b1572249 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -1057,7 +1057,8 @@ struct ndpi_detection_module_struct {
content_automa, /* Used for HTTP subprotocol_detection */
subprotocol_automa, /* Used for HTTP subprotocol_detection */
bigrams_automa, impossible_bigrams_automa; /* TOR */
-
+ /* IMPORTANT: please update ndpi_finalize_initalization() whenever you add a new automa */
+
struct {
#ifdef HAVE_HYPERSCAN
struct hs *hostnames;
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 6fe1e8065..c5059bd06 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -2221,6 +2221,37 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(void) {
/* *********************************************** */
+void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str) {
+ u_int i;
+
+ for(i=0; i<4; i++) {
+ ndpi_automa *automa;
+
+ switch(i) {
+ case 0:
+ automa = &ndpi_str->host_automa;
+ break;
+
+ case 1:
+ automa = &ndpi_str->content_automa;
+ break;
+
+ case 2:
+ automa = &ndpi_str->bigrams_automa;
+ break;
+
+ case 3:
+ automa = &ndpi_str->impossible_bigrams_automa;
+ break;
+ }
+
+ ac_automata_finalize((AC_AUTOMATA_t*)automa->ac_automa);
+ automa->ac_automa_finalized = 1;
+ }
+}
+
+/* *********************************************** */
+
/* Wrappers */
void* ndpi_init_automa(void) {
return(ac_automata_init(ac_match_handler));
@@ -2261,7 +2292,6 @@ int ndpi_match_string(void *_automa, char *string_to_match) {
ac_input_text.astring = string_to_match, ac_input_text.length = strlen(string_to_match);
rc = ac_automata_search(automa, &ac_input_text, &match);
- ac_automata_reset(automa);
/*
As ac_automata_search can detect partial matches and continue the search process
@@ -2289,7 +2319,6 @@ int ndpi_match_string_id(void *_automa, char *string_to_match, u_int match_len,
ac_input_text.astring = string_to_match, ac_input_text.length = match_len;
rc = ac_automata_search(automa, &ac_input_text, &match);
- ac_automata_reset(automa);
/*
As ac_automata_search can detect partial matches and continue the search process
@@ -6072,13 +6101,12 @@ int ndpi_match_string_subprotocol(struct ndpi_detection_module_struct *ndpi_str,
return(NDPI_PROTOCOL_UNKNOWN);
if(!automa->ac_automa_finalized) {
- ac_automata_finalize((AC_AUTOMATA_t*)automa->ac_automa);
- automa->ac_automa_finalized = 1;
+ printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initalization()\n", __FILE__, __LINE__);
+ return(0); /* No matches */
}
ac_input_text.astring = string_to_match, ac_input_text.length = string_to_match_len;
ac_automata_search(((AC_AUTOMATA_t*)automa->ac_automa), &ac_input_text, &match);
- ac_automata_reset(((AC_AUTOMATA_t*)automa->ac_automa));
/* We need to take into account also rc==0 that is used for partial matches */
ret_match->protocol_id = match.number,
@@ -6256,13 +6284,12 @@ int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str,
return(-1);
if(!automa->ac_automa_finalized) {
- ac_automata_finalize((AC_AUTOMATA_t*)automa->ac_automa);
- automa->ac_automa_finalized = 1;
+ printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initalization()\n", __FILE__, __LINE__);
+ return(0); /* No matches */
}
ac_input_text.astring = bigram_to_match, ac_input_text.length = 2;
rc = ac_automata_search(((AC_AUTOMATA_t*)automa->ac_automa), &ac_input_text, &match);
- ac_automata_reset(((AC_AUTOMATA_t*)automa->ac_automa));
/*
As ac_automata_search can detect partial matches and continue the search process
diff --git a/src/lib/third_party/include/ahocorasick.h b/src/lib/third_party/include/ahocorasick.h
index 74812bef1..943be88eb 100644
--- a/src/lib/third_party/include/ahocorasick.h
+++ b/src/lib/third_party/include/ahocorasick.h
@@ -44,6 +44,12 @@ typedef struct
* add pattern to automata anymore. */
unsigned short automata_open;
+ /* Statistic Variables */
+ unsigned long total_patterns; /* Total patterns in the automata */
+
+} AC_AUTOMATA_t;
+
+typedef struct {
/* It is possible to feed a large input to the automata chunk by chunk to
* be searched using ac_automata_search(). in fact by default automata
* thinks that all chunks are related unless you do ac_automata_reset().
@@ -51,18 +57,13 @@ typedef struct
AC_NODE_t * current_node; /* Pointer to current node while searching */
unsigned long base_position; /* Represents the position of current chunk
related to whole input text */
-
- /* Statistic Variables */
- unsigned long total_patterns; /* Total patterns in the automata */
-
-} AC_AUTOMATA_t;
+} AC_SEARCH_t;
AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc);
AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * str);
void ac_automata_finalize (AC_AUTOMATA_t * thiz);
int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * str, AC_REP_t * param);
-void ac_automata_reset (AC_AUTOMATA_t * thiz);
void ac_automata_release (AC_AUTOMATA_t * thiz, u_int8_t free_pattern);
void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast);
diff --git a/src/lib/third_party/src/ahocorasick.c b/src/lib/third_party/src/ahocorasick.c
index ffa8bac77..27134c0cc 100644
--- a/src/lib/third_party/src/ahocorasick.c
+++ b/src/lib/third_party/src/ahocorasick.c
@@ -3,7 +3,8 @@
* This file is part of multifast.
*
Copyright 2010-2012 Kamiar Kanani <kamiar.kanani@gmail.com>
-
+ Copyright 2012-2019 ntop.org (Incremental improvements)
+
multifast is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -56,7 +57,6 @@ AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc)
thiz->all_nodes = (AC_NODE_t **) ndpi_malloc (thiz->all_nodes_max*sizeof(AC_NODE_t *));
thiz->match_callback = mc;
ac_automata_register_nodeptr (thiz, thiz->root);
- ac_automata_reset (thiz);
thiz->total_patterns = 0;
thiz->automata_open = 1;
return thiz;
@@ -88,21 +88,21 @@ AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * patt)
return ACERR_LONG_PATTERN;
for (i=0; i<patt->length; i++)
+ {
+ alpha = patt->astring[i];
+ if ((next = node_find_next(n, alpha)))
{
- alpha = patt->astring[i];
- if ((next = node_find_next(n, alpha)))
- {
- n = next;
- continue;
- }
- else
- {
- next = node_create_next(n, alpha);
- next->depth = n->depth + 1;
- n = next;
- ac_automata_register_nodeptr(thiz, n);
- }
+ n = next;
+ continue;
}
+ else
+ {
+ next = node_create_next(n, alpha);
+ next->depth = n->depth + 1;
+ n = next;
+ ac_automata_register_nodeptr(thiz, n);
+ }
+ }
if(n->final)
return ACERR_DUPLICATE_PATTERN;
@@ -133,11 +133,11 @@ void ac_automata_finalize (AC_AUTOMATA_t * thiz)
ac_automata_traverse_setfailure (thiz, thiz->root, alphas);
for (i=0; i < thiz->all_nodes_num; i++)
- {
- node = thiz->all_nodes[i];
- ac_automata_union_matchstrs (node);
- node_sort_edges (node);
- }
+ {
+ node = thiz->all_nodes[i];
+ ac_automata_union_matchstrs (node);
+ node_sort_edges (node);
+ }
thiz->automata_open = 0; /* do not accept patterns any more */
ndpi_free(alphas);
}
@@ -159,70 +159,53 @@ void ac_automata_finalize (AC_AUTOMATA_t * thiz)
* 0: success; continue searching; call-back sent me a 0 value
* 1: success; stop searching; call-back sent me a non-0 value
******************************************************************************/
-int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * txt, AC_REP_t * param)
-{
+int ac_automata_search (AC_AUTOMATA_t * thiz, AC_TEXT_t * txt, AC_REP_t * param) {
unsigned long position;
AC_NODE_t *curr;
AC_NODE_t *next;
-
+ AC_SEARCH_t s;
+
if(thiz->automata_open)
/* you must call ac_automata_locate_failure() first */
return -1;
+ /* Reset search */
+ s.current_node = thiz->root;
+ s.base_position = 0;
+
position = 0;
- curr = thiz->current_node;
+ curr = s.current_node;
/* This is the main search loop.
* it must be keep as lightweight as possible. */
- while (position < txt->length)
- {
- if(!(next = node_findbs_next(curr, txt->astring[position])))
- {
- if(curr->failure_node /* we are not in the root node */)
- curr = curr->failure_node;
- else
- position++;
- }
+ while (position < txt->length) {
+ if(!(next = node_findbs_next(curr, txt->astring[position]))) {
+ if(curr->failure_node /* we are not in the root node */)
+ curr = curr->failure_node;
else
- {
- curr = next;
- position++;
- }
+ position++;
+ } else {
+ curr = next;
+ position++;
+ }
- if(curr->final && next) {
- /* We check 'next' to find out if we came here after a alphabet
- * transition or due to a fail. in second case we should not report
- * matching because it was reported in previous node */
- thiz->match.position = position + thiz->base_position;
- thiz->match.match_num = curr->matched_patterns_num;
- thiz->match.patterns = curr->matched_patterns;
- /* we found a match! do call-back */
- if (thiz->match_callback(&thiz->match, txt, param))
- return 1;
- }
+ if(curr->final && next) {
+ /* We check 'next' to find out if we came here after a alphabet
+ * transition or due to a fail. in second case we should not report
+ * matching because it was reported in previous node */
+ thiz->match.position = position + s.base_position;
+ thiz->match.match_num = curr->matched_patterns_num;
+ thiz->match.patterns = curr->matched_patterns;
+ /* we found a match! do call-back */
+ if (thiz->match_callback(&thiz->match, txt, param))
+ return 1;
}
+ }
- /* save status variables */
- thiz->current_node = curr;
- thiz->base_position += position;
return 0;
}
/******************************************************************************
- * FUNCTION: ac_automata_reset
- * reset the automata and make it ready for doing new search on a new text.
- * when you finished with the input text, you must reset automata state for
- * new input, otherwise it will not work.
- * PARAMS:
- * AC_AUTOMATA_t * thiz: the pointer to the automata
- ******************************************************************************/
-void ac_automata_reset (AC_AUTOMATA_t * thiz)
-{
- thiz->current_node = thiz->root;
- thiz->base_position = 0;
-}
-
-/******************************************************************************
* FUNCTION: ac_automata_release
* Release all allocated memories to the automata
* PARAMS:
@@ -235,10 +218,10 @@ void ac_automata_release (AC_AUTOMATA_t * thiz, u_int8_t free_pattern)
AC_NODE_t * n;
for (i=0; i < thiz->all_nodes_num; i++)
- {
- n = thiz->all_nodes[i];
- node_release(n, free_pattern);
- }
+ {
+ n = thiz->all_nodes[i];
+ node_release(n, free_pattern);
+ }
ndpi_free(thiz->all_nodes);
ndpi_free(thiz);
}
@@ -261,40 +244,40 @@ void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast)
printf("---------------------------------\n");
for (i=0; i<thiz->all_nodes_num; i++)
+ {
+ n = thiz->all_nodes[i];
+ printf("NODE(%3d)/----fail----> NODE(%3d)\n",
+ n->id, (n->failure_node)?n->failure_node->id:1);
+ for (j=0; j<n->outgoing_degree; j++)
{
- n = thiz->all_nodes[i];
- printf("NODE(%3d)/----fail----> NODE(%3d)\n",
- n->id, (n->failure_node)?n->failure_node->id:1);
- for (j=0; j<n->outgoing_degree; j++)
+ e = &n->outgoing[j];
+ printf(" |----(");
+ if(isgraph(e->alpha))
+ printf("%c)---", e->alpha);
+ else
+ printf("0x%x)", e->alpha);
+ printf("--> NODE(%3d)\n", e->next->id);
+ }
+ if (n->matched_patterns_num) {
+ printf("Accepted patterns: {");
+ for (j=0; j<n->matched_patterns_num; j++)
+ {
+ sid = n->matched_patterns[j];
+ if(j) printf(", ");
+ switch (repcast)
{
- e = &n->outgoing[j];
- printf(" |----(");
- if(isgraph(e->alpha))
- printf("%c)---", e->alpha);
- else
- printf("0x%x)", e->alpha);
- printf("--> NODE(%3d)\n", e->next->id);
+ case 'n':
+ printf("%u/%u/%u",
+ sid.rep.number,
+ sid.rep.category,
+ sid.rep.breed);
+ break;
}
- if (n->matched_patterns_num) {
- printf("Accepted patterns: {");
- for (j=0; j<n->matched_patterns_num; j++)
- {
- sid = n->matched_patterns[j];
- if(j) printf(", ");
- switch (repcast)
- {
- case 'n':
- printf("%u/%u/%u",
- sid.rep.number,
- sid.rep.category,
- sid.rep.breed);
- break;
- }
- }
- printf("}\n");
}
- printf("---------------------------------\n");
+ printf("}\n");
}
+ printf("---------------------------------\n");
+ }
}
/******************************************************************************
@@ -304,13 +287,13 @@ void ac_automata_display (AC_AUTOMATA_t * thiz, char repcast)
static void ac_automata_register_nodeptr (AC_AUTOMATA_t * thiz, AC_NODE_t * node)
{
if(thiz->all_nodes_num >= thiz->all_nodes_max)
- {
- thiz->all_nodes = ndpi_realloc(thiz->all_nodes,
- thiz->all_nodes_max*sizeof(AC_NODE_t *),
- (REALLOC_CHUNK_ALLNODES+thiz->all_nodes_max)*sizeof(AC_NODE_t *)
- );
- thiz->all_nodes_max += REALLOC_CHUNK_ALLNODES;
- }
+ {
+ thiz->all_nodes = ndpi_realloc(thiz->all_nodes,
+ thiz->all_nodes_max*sizeof(AC_NODE_t *),
+ (REALLOC_CHUNK_ALLNODES+thiz->all_nodes_max)*sizeof(AC_NODE_t *)
+ );
+ thiz->all_nodes_max += REALLOC_CHUNK_ALLNODES;
+ }
thiz->all_nodes[thiz->all_nodes_num++] = node;
}
@@ -325,13 +308,13 @@ static void ac_automata_union_matchstrs (AC_NODE_t * node)
AC_NODE_t * m = node;
while ((m = m->failure_node))
- {
- for (i=0; i < m->matched_patterns_num; i++)
- node_register_matchstr(node, &(m->matched_patterns[i]), 1 /* this is an existing node */);
+ {
+ for (i=0; i < m->matched_patterns_num; i++)
+ node_register_matchstr(node, &(m->matched_patterns[i]), 1 /* this is an existing node */);
- if (m->final)
- node->final = 1;
- }
+ if (m->final)
+ node->final = 1;
+ }
// TODO : sort matched_patterns? is that necessary? I don't think so.
}
@@ -346,16 +329,16 @@ static void ac_automata_set_failure
AC_NODE_t * m;
for (i=1; i < node->depth; i++)
+ {
+ m = thiz->root;
+ for (j=i; j < node->depth && m; j++)
+ m = node_find_next (m, alphas[j]);
+ if (m)
{
- m = thiz->root;
- for (j=i; j < node->depth && m; j++)
- m = node_find_next (m, alphas[j]);
- if (m)
- {
- node->failure_node = m;
- break;
- }
+ node->failure_node = m;
+ break;
}
+ }
if (!node->failure_node)
node->failure_node = thiz->root;
}
@@ -373,15 +356,14 @@ static void ac_automata_traverse_setfailure
unsigned int i;
AC_NODE_t * next;
- for (i=0; i < node->outgoing_degree; i++)
- {
- alphas[node->depth] = node->outgoing[i].alpha;
- next = node->outgoing[i].next;
+ for (i=0; i < node->outgoing_degree; i++) {
+ alphas[node->depth] = node->outgoing[i].alpha;
+ next = node->outgoing[i].next;
- /* At every node look for its failure node */
- ac_automata_set_failure (thiz, next, alphas);
+ /* At every node look for its failure node */
+ ac_automata_set_failure (thiz, next, alphas);
- /* Recursively call itself to traverse all nodes */
- ac_automata_traverse_setfailure (thiz, next, alphas);
- }
+ /* Recursively call itself to traverse all nodes */
+ ac_automata_traverse_setfailure (thiz, next, alphas);
+ }
}