aboutsummaryrefslogtreecommitdiff
path: root/src/lib/ndpi_utils.c
diff options
context:
space:
mode:
authorVitaly Lavrov <vel21ripn@gmail.com>2021-07-12 15:39:43 +0000
committerGitHub <noreply@github.com>2021-07-12 17:39:43 +0200
commitc418b7110b9385c5c3748c10e198df27ae0f7083 (patch)
tree046941f8085b48bf27b03cd60bfaee180906af21 /src/lib/ndpi_utils.c
parent78b1295dc18e297c1da53006bde1e0870e278db9 (diff)
ahoсorasick. Code review. Part 2. (#1236)
Simplified the process of adding lines to AC_AUTOMATA_t. Use the ndpi_string_to_automa() function to add patterns with domain names. For other cases can use ndpi_add_string_value_to_automa(). ac_automata_feature(ac_automa, AC_FEATURE_LC) allows adding and compare data in a case insensitive manner. For mandatory pattern comparison from the end of the line, the "ac_pattern.rep.at_end=1" flag is used. This eliminated unnecessary conversions to lowercase and adding "$" for end-of-line matching in domain name patterns. ac_match_handler() has been renamed ac_domain_match_handler() and has been greatly simplified. ac_domain_match_handler() looks for the template with the highest domain level. For special cases it is possible to manually specify the domain level. Added test for checking ambiguous domain names like: - short.weixin.qq.com is QQ, not Wechat - instagram.faae1-1.fna.fbcdn.net is Instagram, not Facebook If you specify a NULL handler when creating the AC_AUTOMATA_t structure, then a pattern with the maximum length that satisfies the search conditions will be found (exact match, from the beginning of the string, from the end of the string, or a substring). Added debugging for ac_automata_search. To do this, you need to enable debugging globally using ac_automata_enable_debug(1) and enable debugging in the AC_AUTOMATA_t structure using ac_automata_name("name", AC_FEATURE_DEBUG). The search will display "name" and a list of matching patterns. Running "AHO_DEBUG=1 ndpiReader ..." will show the lines that were searched for templates and which templates were found. The ac_automata_dump() prototype has been changed. Now it outputs data to a file. If it is specified as NULL, then the output will be directed to stdout. If you need to get data as a string, then use open_memstream(). Added the ability to run individual tests via the do.sh script
Diffstat (limited to 'src/lib/ndpi_utils.c')
-rw-r--r--src/lib/ndpi_utils.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c
index 9814733f7..95f0a4345 100644
--- a/src/lib/ndpi_utils.c
+++ b/src/lib/ndpi_utils.c
@@ -731,7 +731,7 @@ const char* ndpi_cipher2str(u_int32_t cipher) {
/* ******************************************************************** */
-static int ndpi_is_other_char(char c) {
+static inline int ndpi_is_other_char(char c) {
return((c == '.')
|| (c == ' ')
|| (c == '@')
@@ -741,7 +741,7 @@ static int ndpi_is_other_char(char c) {
/* ******************************************************************** */
-static int ndpi_is_valid_char(char c) {
+static int _ndpi_is_valid_char(char c) {
if(ispunct(c) && (!ndpi_is_other_char(c)))
return(0);
else
@@ -749,6 +749,18 @@ static int ndpi_is_valid_char(char c) {
|| isalpha(c)
|| ndpi_is_other_char(c));
}
+static char ndpi_is_valid_char_tbl[256],ndpi_is_valid_char_tbl_init=0;
+
+static void _ndpi_is_valid_char_init(void) {
+ int c;
+ for(c=0; c < 256; c++) ndpi_is_valid_char_tbl[c] = _ndpi_is_valid_char(c);
+ ndpi_is_valid_char_tbl_init = 1;
+}
+static inline int ndpi_is_valid_char(char c) {
+ if(!ndpi_is_valid_char_tbl_init)
+ _ndpi_is_valid_char_init();
+ return ndpi_is_valid_char_tbl[(unsigned char)c];
+}
/* ******************************************************************** */