diff options
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/ndpi_content_match.c.inc | 7 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 3 | ||||
-rw-r--r-- | src/lib/ndpi_utils.c | 85 |
3 files changed, 89 insertions, 6 deletions
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc index 7ed3509a4..d12560bb1 100644 --- a/src/lib/ndpi_content_match.c.inc +++ b/src/lib/ndpi_content_match.c.inc @@ -8700,8 +8700,7 @@ ndpi_protocol_match host_match[] = { { NULL, NULL, NULL, 0 } }; - -/* ****************************************************** */ +/* ******************************************************************** */ /* Tor @@ -8763,6 +8762,8 @@ static const char *ndpi_en_bigrams[] = { "py", "ry", "sy", "ty", "uy", "vy", "wy", "xy", "yy", "zy", "az", "bz", "cz", "dz", "ez", "gz", "iz", "lz", "nz", "oz", "pz", "rz", "tz", "uz", "zz", NULL }; +/* ******************************************************************** */ + static const char *ndpi_en_impossible_bigrams[] = { "bk", "bq", "bx", "cb", "cf", "cg", "cj", "cp", "cv", "cw", "cx", "dx", "fk", "fq", "fv", "fx", /* "ee", removed it can be found in 'meeting' */ "fz", "gq", "gv", "gx", "hh", "hk", "hv", "hx", "hz", "iy", "jb", /* "jc", jcrew.com */ "jd", "jf", "jg", "jh", "jk", @@ -8775,3 +8776,5 @@ static const char *ndpi_en_impossible_bigrams[] = { "yj", "yq", "yv", "yz", "yw", "zb", "zc", "zg", "zh", "zj", "zn", "zq", "zs", "zx", "wh", "wk", "wb", "zk", "kp", "zk", "xy", NULL }; + + diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 4112bc657..3e1c445fe 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -650,8 +650,7 @@ static void destroy_hyperscan(struct ndpi_detection_module_struct *ndpi_mod) { /* ******************************************************************** */ -static void init_string_based_protocols(struct ndpi_detection_module_struct *ndpi_mod) -{ +static void init_string_based_protocols(struct ndpi_detection_module_struct *ndpi_mod) { int i; #ifdef HAVE_HYPERSCAN diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c index f48a19393..480f4ff18 100644 --- a/src/lib/ndpi_utils.c +++ b/src/lib/ndpi_utils.c @@ -66,7 +66,7 @@ int ndpi_check_punycode_string(char * buffer , int len) { // is a punycode string return(1); } - + // not a punycode string return 0; } @@ -276,7 +276,7 @@ int strcasecmp(const char *s1, const char *s2) { register const u_char *cm = charmap, *us1 = (const u_char *)s1, *us2 = (const u_char *)s2; - + while (cm[*us1] == cm[*us2++]) if(*us1++ == '\0') return (0); @@ -601,3 +601,84 @@ const char* ndpi_cipher2str(u_int32_t cipher) { } } +/* ******************************************************************** */ + +static int ndpi_is_other_char(char c) { + return((c == '.') + || (c == '@') + ); +} + +/* ******************************************************************** */ + +static int ndpi_is_valid_char(char c) { + return(isdigit(c) + || isalpha(c) + || ndpi_is_other_char(c)); +} + +/* ******************************************************************** */ + + +static int ndpi_find_non_eng_bigrams(struct ndpi_detection_module_struct *ndpi_struct, + char *str) { + char s[3]; + + if((isdigit(str[0]) && isdigit(str[1])) + || (ndpi_is_other_char(str[0]) || ndpi_is_other_char(str[1])) + || (ndpi_is_other_char(str[0]) || ndpi_is_other_char(str[1])) + ) + return(1); + + s[0] = tolower(str[0]), s[1] = tolower(str[1]), s[2] = '\0'; + + return(ndpi_match_bigram(ndpi_struct, &ndpi_struct->bigrams_automa, s)); +} + +/* ******************************************************************** */ + +/* #define PRINT_STRINGS 1 */ + +int ndpi_has_human_readeable_string(struct ndpi_detection_module_struct *ndpi_struct, + char *buffer, u_int buffer_size) { + u_int ret = 0, i = 0, do_cr = 0, len = 0; + const u_int8_t NDPI_MIN_VALID_STRING_LEN = 4; /* Will return 0 if no string > NDPI_MIN_VALID_STRING_LEN have been found */ + + if(buffer_size <= 0) + return(0); + + for(i=0; i<buffer_size-2; i++) { + if(ndpi_is_valid_char(buffer[i]) + && ndpi_is_valid_char(buffer[i+1]) + && ndpi_find_non_eng_bigrams(ndpi_struct, &buffer[i])) { +#ifdef PRINT_STRINGS + printf("%c%c", buffer[i], buffer[i+1]); +#endif + do_cr = 1, i += 1, len += 2; + } else { + if(ndpi_is_valid_char(buffer[i]) && do_cr) { +#ifdef PRINT_STRINGS + printf("%c", buffer[i]); +#endif + len += 1; + } + + // printf("->> %c%c\n", isprint(buffer[i]) ? buffer[i] : '.', isprint(buffer[i+1]) ? buffer[i+1] : '.'); + if(do_cr) { +#ifdef PRINT_STRINGS + printf(" [len: %u]\n", len); +#endif + if(len > NDPI_MIN_VALID_STRING_LEN) + ret = 1; + + do_cr = 0, len = 0; + } + } + } + +#ifdef PRINT_STRINGS + printf("=======>> Found string: %u\n", ret); +#endif + + return(ret); +} |