aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h3
-rw-r--r--src/lib/ndpi_content_match.c.inc7
-rw-r--r--src/lib/ndpi_main.c3
-rw-r--r--src/lib/ndpi_utils.c85
4 files changed, 91 insertions, 7 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index a0a3c92af..06444ed39 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -811,7 +811,8 @@ extern "C" {
const char* ndpi_cipher2str(u_int32_t cipher);
u_int16_t ndpi_guess_host_protocol_id(struct ndpi_detection_module_struct *ndpi_struct,
struct ndpi_flow_struct *flow);
-
+ int ndpi_has_human_readeable_string(struct ndpi_detection_module_struct *ndpi_struct,
+ char *buffer, u_int buffer_size);
#ifdef __cplusplus
}
#endif
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc
index 7ed3509a4..d12560bb1 100644
--- a/src/lib/ndpi_content_match.c.inc
+++ b/src/lib/ndpi_content_match.c.inc
@@ -8700,8 +8700,7 @@ ndpi_protocol_match host_match[] = {
{ NULL, NULL, NULL, 0 }
};
-
-/* ****************************************************** */
+/* ******************************************************************** */
/*
Tor
@@ -8763,6 +8762,8 @@ static const char *ndpi_en_bigrams[] = {
"py", "ry", "sy", "ty", "uy", "vy", "wy", "xy", "yy", "zy", "az", "bz", "cz", "dz", "ez", "gz", "iz",
"lz", "nz", "oz", "pz", "rz", "tz", "uz", "zz", NULL };
+/* ******************************************************************** */
+
static const char *ndpi_en_impossible_bigrams[] = {
"bk", "bq", "bx", "cb", "cf", "cg", "cj", "cp", "cv", "cw", "cx", "dx", "fk", "fq", "fv", "fx", /* "ee", removed it can be found in 'meeting' */
"fz", "gq", "gv", "gx", "hh", "hk", "hv", "hx", "hz", "iy", "jb", /* "jc", jcrew.com */ "jd", "jf", "jg", "jh", "jk",
@@ -8775,3 +8776,5 @@ static const char *ndpi_en_impossible_bigrams[] = {
"yj", "yq", "yv", "yz", "yw", "zb", "zc", "zg", "zh", "zj", "zn", "zq", "zs", "zx", "wh", "wk",
"wb", "zk", "kp", "zk", "xy",
NULL };
+
+
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 4112bc657..3e1c445fe 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -650,8 +650,7 @@ static void destroy_hyperscan(struct ndpi_detection_module_struct *ndpi_mod) {
/* ******************************************************************** */
-static void init_string_based_protocols(struct ndpi_detection_module_struct *ndpi_mod)
-{
+static void init_string_based_protocols(struct ndpi_detection_module_struct *ndpi_mod) {
int i;
#ifdef HAVE_HYPERSCAN
diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c
index f48a19393..480f4ff18 100644
--- a/src/lib/ndpi_utils.c
+++ b/src/lib/ndpi_utils.c
@@ -66,7 +66,7 @@ int ndpi_check_punycode_string(char * buffer , int len) {
// is a punycode string
return(1);
}
-
+
// not a punycode string
return 0;
}
@@ -276,7 +276,7 @@ int strcasecmp(const char *s1, const char *s2) {
register const u_char *cm = charmap,
*us1 = (const u_char *)s1,
*us2 = (const u_char *)s2;
-
+
while (cm[*us1] == cm[*us2++])
if(*us1++ == '\0')
return (0);
@@ -601,3 +601,84 @@ const char* ndpi_cipher2str(u_int32_t cipher) {
}
}
+/* ******************************************************************** */
+
+static int ndpi_is_other_char(char c) {
+ return((c == '.')
+ || (c == '@')
+ );
+}
+
+/* ******************************************************************** */
+
+static int ndpi_is_valid_char(char c) {
+ return(isdigit(c)
+ || isalpha(c)
+ || ndpi_is_other_char(c));
+}
+
+/* ******************************************************************** */
+
+
+static int ndpi_find_non_eng_bigrams(struct ndpi_detection_module_struct *ndpi_struct,
+ char *str) {
+ char s[3];
+
+ if((isdigit(str[0]) && isdigit(str[1]))
+ || (ndpi_is_other_char(str[0]) || ndpi_is_other_char(str[1]))
+ || (ndpi_is_other_char(str[0]) || ndpi_is_other_char(str[1]))
+ )
+ return(1);
+
+ s[0] = tolower(str[0]), s[1] = tolower(str[1]), s[2] = '\0';
+
+ return(ndpi_match_bigram(ndpi_struct, &ndpi_struct->bigrams_automa, s));
+}
+
+/* ******************************************************************** */
+
+/* #define PRINT_STRINGS 1 */
+
+int ndpi_has_human_readeable_string(struct ndpi_detection_module_struct *ndpi_struct,
+ char *buffer, u_int buffer_size) {
+ u_int ret = 0, i = 0, do_cr = 0, len = 0;
+ const u_int8_t NDPI_MIN_VALID_STRING_LEN = 4; /* Will return 0 if no string > NDPI_MIN_VALID_STRING_LEN have been found */
+
+ if(buffer_size <= 0)
+ return(0);
+
+ for(i=0; i<buffer_size-2; i++) {
+ if(ndpi_is_valid_char(buffer[i])
+ && ndpi_is_valid_char(buffer[i+1])
+ && ndpi_find_non_eng_bigrams(ndpi_struct, &buffer[i])) {
+#ifdef PRINT_STRINGS
+ printf("%c%c", buffer[i], buffer[i+1]);
+#endif
+ do_cr = 1, i += 1, len += 2;
+ } else {
+ if(ndpi_is_valid_char(buffer[i]) && do_cr) {
+#ifdef PRINT_STRINGS
+ printf("%c", buffer[i]);
+#endif
+ len += 1;
+ }
+
+ // printf("->> %c%c\n", isprint(buffer[i]) ? buffer[i] : '.', isprint(buffer[i+1]) ? buffer[i+1] : '.');
+ if(do_cr) {
+#ifdef PRINT_STRINGS
+ printf(" [len: %u]\n", len);
+#endif
+ if(len > NDPI_MIN_VALID_STRING_LEN)
+ ret = 1;
+
+ do_cr = 0, len = 0;
+ }
+ }
+ }
+
+#ifdef PRINT_STRINGS
+ printf("=======>> Found string: %u\n", ret);
+#endif
+
+ return(ret);
+}