aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVitaly Lavrov <vel21ripn@gmail.com>2021-06-23 10:03:52 +0000
committerGitHub <noreply@github.com>2021-06-23 12:03:52 +0200
commit35fc6a6de5ab1be3cc4c3c1eb9428b2312a503d0 (patch)
tree513b073307c4bd44bb2e211be2eaf82c730544be /src
parentf19937c8c9fd4e1c21988f523d9e78b954f6fcc8 (diff)
Speed and memory size optimisation (#1214)
Removed bigram_automata, impossible_bigram_automata, trigram_automata. The ahocorasick structure is replaced with a bitmap. The bitmap size for ndpi_en_bigram is 176 bytes. The bitmap size for ndpi_en_trigram is 2201 bytes. On the test machine, the test execution time was reduced from 27.3 seconds to 24.7 (9%).
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h.in4
-rw-r--r--src/include/ndpi_typedefs.h1
-rw-r--r--src/lib/ndpi_main.c133
-rw-r--r--src/lib/ndpi_utils.c2
4 files changed, 56 insertions, 84 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in
index 87e03f7e9..f62b4aa3c 100644
--- a/src/include/ndpi_api.h.in
+++ b/src/include/ndpi_api.h.in
@@ -499,9 +499,7 @@ extern "C" {
* @return 0
*
*/
- int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_mod,
- ndpi_automa *automa,
- char *bigram_to_match);
+ int ndpi_match_bigram(const char *bigram_to_match);
/**
* Write the protocol name in the buffer -buf- as master_protocol.protocol
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 173f3baff..375739573 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -1153,7 +1153,6 @@ struct ndpi_detection_module_struct {
ndpi_automa host_automa, /* Used for DNS/HTTPS */
content_automa, /* Used for HTTP subprotocol_detection */
subprotocol_automa, /* Used for HTTP subprotocol_detection */
- bigrams_automa, trigrams_automa, impossible_bigrams_automa, /* TOR */
risky_domain_automa, tls_cert_subject_automa,
malicious_ja3_automa, malicious_sha1_automa;
/* IMPORTANT: please update ndpi_finalize_initialization() whenever you add a new automa */
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index ff4b0cb5d..00ebb1cc6 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -692,6 +692,28 @@ void ndpi_self_check_host_match() {
/* ******************************************************************** */
+#define XGRAMS_C 26
+static int ndpi_xgrams_inited = 0;
+static unsigned int bigrams_bitmap[(XGRAMS_C*XGRAMS_C+31)/32];
+static unsigned int imposible_bigrams_bitmap[(XGRAMS_C*XGRAMS_C+31)/32];
+static unsigned int trigrams_bitmap[(XGRAMS_C*XGRAMS_C*XGRAMS_C+31)/32];
+
+
+static void ndpi_xgrams_init(unsigned int *dst,size_t dn, const char **src,size_t sn,int l) {
+unsigned int i,j,c;
+for(i=0;i < sn && src[i]; i++) {
+ for(j=0,c=0; j < l; j++) {
+ unsigned char a = (unsigned char)src[i][j];
+ if(a < 'a' || a > 'z') { printf("%u: c%u %c\n",i,j,a); abort(); }
+ c *= XGRAMS_C;
+ c += a - 'a';
+ }
+ if(src[i][l]) { printf("%u: c[%d] != 0\n",i,l); abort(); }
+ if((c >> 3) >= dn) abort();
+ dst[c >> 5] |= 1u << (c & 0x1f);
+}
+}
+
static void init_string_based_protocols(struct ndpi_detection_module_struct *ndpi_str) {
int i;
@@ -722,16 +744,16 @@ static void init_string_based_protocols(struct ndpi_detection_module_struct *ndp
#ifdef MATCH_DEBUG
// ac_automata_display(ndpi_str->host_automa.ac_automa, 'n');
#endif
+ if(!ndpi_xgrams_inited) {
+ ndpi_xgrams_inited = 1;
+ ndpi_xgrams_init(bigrams_bitmap,sizeof(bigrams_bitmap),
+ ndpi_en_bigrams,sizeof(ndpi_en_bigrams)/sizeof(ndpi_en_bigrams[0]), 2);
- for(i = 0; ndpi_en_bigrams[i] != NULL; i++)
- ndpi_string_to_automa(ndpi_str, &ndpi_str->bigrams_automa, (char *) ndpi_en_bigrams[i], 1, 1, 1, 0, 0);
-
- for(i = 0; ndpi_en_trigrams[i] != NULL; i++)
- ndpi_string_to_automa(ndpi_str, &ndpi_str->trigrams_automa, (char *) ndpi_en_trigrams[i], 1, 1, 1, 0, 0);
-
- for(i = 0; ndpi_en_impossible_bigrams[i] != NULL; i++)
- ndpi_string_to_automa(ndpi_str, &ndpi_str->impossible_bigrams_automa, (char *) ndpi_en_impossible_bigrams[i], 1,
- 1, 1, 0, 0);
+ ndpi_xgrams_init(imposible_bigrams_bitmap,sizeof(imposible_bigrams_bitmap),
+ ndpi_en_impossible_bigrams,sizeof(ndpi_en_impossible_bigrams)/sizeof(ndpi_en_impossible_bigrams[0]), 2);
+ ndpi_xgrams_init(trigrams_bitmap,sizeof(trigrams_bitmap),
+ ndpi_en_trigrams,sizeof(ndpi_en_trigrams)/sizeof(ndpi_en_trigrams[0]), 3);
+ }
}
/* ******************************************************************** */
@@ -2305,9 +2327,6 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs
ndpi_str->host_automa.ac_automa = ac_automata_init(ac_match_handler);
ndpi_str->content_automa.ac_automa = ac_automata_init(ac_match_handler);
- ndpi_str->bigrams_automa.ac_automa = ac_automata_init(ac_match_handler);
- ndpi_str->trigrams_automa.ac_automa = ac_automata_init(ac_match_handler);
- ndpi_str->impossible_bigrams_automa.ac_automa = ac_automata_init(ac_match_handler);
ndpi_str->tls_cert_subject_automa.ac_automa = ac_automata_init(ac_match_handler);
ndpi_str->malicious_ja3_automa.ac_automa = NULL; /* Initialized on demand */
ndpi_str->malicious_sha1_automa.ac_automa = NULL; /* Initialized on demand */
@@ -2366,29 +2385,17 @@ void ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str)
break;
case 2:
- automa = &ndpi_str->bigrams_automa;
- break;
-
- case 3:
- automa = &ndpi_str->impossible_bigrams_automa;
- break;
-
- case 4:
automa = &ndpi_str->tls_cert_subject_automa;
break;
- case 5:
+ case 3:
automa = &ndpi_str->malicious_ja3_automa;
break;
- case 6:
+ case 4:
automa = &ndpi_str->malicious_sha1_automa;
break;
- case 7:
- automa = &ndpi_str->trigrams_automa;
- break;
-
default:
ndpi_str->ac_automa_finalized = 1;
return;
@@ -2665,15 +2672,6 @@ void ndpi_exit_detection_module(struct ndpi_detection_module_struct *ndpi_str) {
if(ndpi_str->content_automa.ac_automa != NULL)
ac_automata_release((AC_AUTOMATA_t *) ndpi_str->content_automa.ac_automa, 0);
- if(ndpi_str->bigrams_automa.ac_automa != NULL)
- ac_automata_release((AC_AUTOMATA_t *) ndpi_str->bigrams_automa.ac_automa, 0);
-
- if(ndpi_str->trigrams_automa.ac_automa != NULL)
- ac_automata_release((AC_AUTOMATA_t *) ndpi_str->trigrams_automa.ac_automa, 0);
-
- if(ndpi_str->impossible_bigrams_automa.ac_automa != NULL)
- ac_automata_release((AC_AUTOMATA_t *) ndpi_str->impossible_bigrams_automa.ac_automa, 0);
-
if(ndpi_str->risky_domain_automa.ac_automa != NULL)
ac_automata_release((AC_AUTOMATA_t *) ndpi_str->risky_domain_automa.ac_automa,
1 /* free patterns strings memory */);
@@ -6823,52 +6821,31 @@ u_int16_t ndpi_match_content_subprotocol(struct ndpi_detection_module_struct *nd
/* ****************************************************** */
-int ndpi_match_bigram(struct ndpi_detection_module_struct *ndpi_str,
- ndpi_automa *automa, char *bigram_to_match) {
-
- if((automa->ac_automa == NULL) || (bigram_to_match == NULL))
- return(-1);
-
- if(!ndpi_str->ac_automa_finalized) {
-#if 1
- ndpi_finalize_initialization(ndpi_str);
-#else
- printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__);
- return(0); /* No matches */
-#endif
+static inline int ndpi_match_xgram(unsigned int *map,int l,const char *str) {
+ unsigned int i,c;
+ for(i=0,c=0; *str && i < l; i++) {
+ unsigned char a = (unsigned char)(*str++);
+ if(a < 'a' || a > 'z') return 0;
+ c *= XGRAMS_C;
+ c += a-'a';
}
- return ndpi_match_string_common(automa->ac_automa,bigram_to_match,2, NULL, NULL, NULL);
+ return (map[c >> 5] & (1u << (c & 0x1f))) != 0;
+}
+int ndpi_match_bigram(const char *str) {
+ return ndpi_match_xgram(bigrams_bitmap, 2, str);
}
-/* ****************************************************** */
-
-int ndpi_match_trigram(struct ndpi_detection_module_struct *ndpi_str,
- ndpi_automa *automa, char *trigram_to_match) {
- int rc;
-
- if((automa->ac_automa == NULL) || (trigram_to_match == NULL))
- return(-1);
+int ndpi_match_impossible_bigram(const char *str) {
+ return ndpi_match_xgram(imposible_bigrams_bitmap, 2, str);
+}
- if(!ndpi_str->ac_automa_finalized) {
-#if 1
- ndpi_finalize_initialization(ndpi_str);
-#else
- printf("[%s:%d] [NDPI] Internal error: please call ndpi_finalize_initialization()\n", __FILE__, __LINE__);
- return(0); /* No matches */
-#endif
- }
- rc = ndpi_match_string_common(automa->ac_automa,trigram_to_match,3, NULL, NULL, NULL);
- if(ndpi_verbose_dga_detection && rc) {
- printf("[%s:%d] [NDPI] Trigram %c%c%c\n",
- __FILE__, __LINE__,
- trigram_to_match[0],
- trigram_to_match[1],
- trigram_to_match[2]);
- }
+/* ****************************************************** */
- return(rc);
+int ndpi_match_trigram(const char *str) {
+ return ndpi_match_xgram(trigrams_bitmap, 3, str);
}
+
/* ****************************************************** */
void ndpi_free_flow(struct ndpi_flow_struct *flow) {
@@ -7506,16 +7483,14 @@ int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str,
if(ndpi_verbose_dga_detection)
printf("-> Checking %c%c\n", word[i], word[i+1]);
- if(ndpi_match_bigram(ndpi_str,
- &ndpi_str->impossible_bigrams_automa,
- &word[i])) {
+ if(ndpi_match_impossible_bigram(&word[i])) {
if(ndpi_verbose_dga_detection)
printf("IMPOSSIBLE %s\n", &word[i]);
num_impossible++;
} else {
if(!skip_next_bigram) {
- if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &word[i])) {
+ if(ndpi_match_bigram(&word[i])) {
num_found++, skip_next_bigram = 1;
}
} else
@@ -7532,7 +7507,7 @@ int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str,
} else {
num_trigram_checked++;
- if(ndpi_match_trigram(ndpi_str, &ndpi_str->trigrams_automa, &word[i]))
+ if(ndpi_match_trigram(&word[i]))
num_trigram_found++, trigram_char_skip = 2 /* 1 char overlap */;
else if(ndpi_verbose_dga_detection)
printf("[NDPI] NO Trigram %c%c%c\n", word[i], word[i+1], word[i+2]);
diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c
index f651ddd64..dddec3299 100644
--- a/src/lib/ndpi_utils.c
+++ b/src/lib/ndpi_utils.c
@@ -766,7 +766,7 @@ static int ndpi_find_non_eng_bigrams(struct ndpi_detection_module_struct *ndpi_s
s[0] = tolower(str[0]), s[1] = tolower(str[1]), s[2] = '\0';
- return(ndpi_match_bigram(ndpi_struct, &ndpi_struct->bigrams_automa, s));
+ return(ndpi_match_bigram(s));
}
/* ******************************************************************** */