From 36abf06c6f59b66bde48e7b3028b4823ecc6ed85 Mon Sep 17 00:00:00 2001 From: Luca Deri Date: Tue, 29 Aug 2023 17:34:04 +0200 Subject: Swap from Aho-Corasick to an experimental/home-grown algorithm that uses a probabilistic approach for handling Internet domain names. For switching back to Aho-Corasick it is necessary to edit ndpi-typedefs.h and uncomment the line // #define USE_LEGACY_AHO_CORASICK [1] With Aho-Corasick $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.34 KB Flow Memory (per flow): 960 B Actual Memory: 33.09 MB Peak Memory: 33.09 MB [2] With the new algorithm $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.31 KB Flow Memory (per flow): 960 B Actual Memory: 7.42 MB Peak Memory: 7.42 MB In essence from ~33 MB to ~7 MB This new algorithm will enable larger lists to be loaded (e.g. top 1M domans https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) In ./lists there are file names that are named as _.list With -G ndpiReader can load all of them at startup --- example/ndpiReader.c | 53 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'example/ndpiReader.c') diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 86452cbe1..65c07d2bc 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -80,6 +80,7 @@ static char *_customCategoryFilePath= NULL; /**< Custom categories file path */ static char *_maliciousJA3Path = NULL; /**< Malicious JA3 signatures */ static char *_maliciousSHA1Path = NULL; /**< Malicious SSL certificate SHA1 fingerprints */ static char *_riskyDomainFilePath = NULL; /**< Risky domain files */ +static char *_categoriesDirPath = NULL; /**< Directory containing domain files */ static u_int8_t live_capture = 0; static u_int8_t undetected_flows_deleted = 0; static FILE *csv_fp = NULL; /**< for CSV export */ @@ -543,6 +544,7 @@ static void help(u_int long_help) { " -r | Load risky domain file\n" " -j | Load malicious JA3 fingeprints\n" " -S | Load malicious SSL certificate SHA1 fingerprints\n" + " -G | Bind domain names to categories loading files from \n" " -w | Write test output on the specified file. This is useful for\n" " | testing purposes in order to compare results across runs\n" " -h | This help\n" @@ -647,6 +649,7 @@ static struct option longopts[] = { { "filter", required_argument, NULL, 'f'}, { "flow-stats", required_argument, NULL, 'F'}, { "cpu-bind", required_argument, NULL, 'g'}, + { "load-categories", required_argument, NULL, 'G'}, { "loops", required_argument, NULL, 'l'}, { "num-threads", required_argument, NULL, 'n'}, { "ignore-vlanid", no_argument, NULL, 'I'}, @@ -965,7 +968,8 @@ static void parseOptions(int argc, char **argv) { lru_cache_ttls[i] = -1; /* Use the default value */ } - while((opt = getopt_long(argc, argv, "a:Ab:B:e:Ec:C:dDFf:g:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:X:w:zZ:q0123:456:7:89:m:MT:U:", + while((opt = getopt_long(argc, argv, + "a:Ab:B:e:Ec:C:dDFf:g:G:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:X:w:zZ:q0123:456:7:89:m:MT:U:", longopts, &option_idx)) != EOF) { #ifdef DEBUG_TRACE if(trace) fprintf(trace, " #### Handling option -%c [%s] #### \n", opt, optarg ? optarg : ""); @@ -1035,6 +1039,10 @@ static void parseOptions(int argc, char **argv) { #endif #endif + case 'G': + _categoriesDirPath = optarg; + break; + case 'l': num_loops = atoi(optarg); break; @@ -1107,6 +1115,7 @@ static void parseOptions(int argc, char **argv) { module_tmp = ndpi_init_detection_module(0); if(!module_tmp) break; + NDPI_BITMASK_SET_ALL(all); ndpi_set_protocol_detection_bitmask2(module_tmp, &all); ndpi_finalize_initialization(module_tmp); @@ -2646,21 +2655,18 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) { exit(-1); } - ndpi_set_protocol_detection_bitmask2(ndpi_thread_info[thread_id].workflow->ndpi_struct, &enabled_bitmask); - - // clear memory for results - memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter, 0, - sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter)); - memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes, 0, - sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes)); - memset(ndpi_thread_info[thread_id].workflow->stats.protocol_flows, 0, - sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_flows)); - memset(ndpi_thread_info[thread_id].workflow->stats.flow_confidence, 0, - sizeof(ndpi_thread_info[thread_id].workflow->stats.flow_confidence)); + if(_categoriesDirPath) + ndpi_load_categories_dir(ndpi_thread_info[thread_id].workflow->ndpi_struct, _categoriesDirPath); + + if(_riskyDomainFilePath) + ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath); - if(_protoFilePath != NULL) - ndpi_load_protocols_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _protoFilePath); + if(_maliciousJA3Path) + ndpi_load_malicious_ja3_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousJA3Path); + if(_maliciousSHA1Path) + ndpi_load_malicious_sha1_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousSHA1Path); + if(_customCategoryFilePath) { char *label = strrchr(_customCategoryFilePath, '/'); @@ -2672,14 +2678,21 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) { ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath, label); } - if(_riskyDomainFilePath) - ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath); + /* Make sure to load lists before finalizing the initialization */ + ndpi_set_protocol_detection_bitmask2(ndpi_thread_info[thread_id].workflow->ndpi_struct, &enabled_bitmask); - if(_maliciousJA3Path) - ndpi_load_malicious_ja3_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousJA3Path); + // clear memory for results + memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter, 0, + sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter)); + memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes, 0, + sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes)); + memset(ndpi_thread_info[thread_id].workflow->stats.protocol_flows, 0, + sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_flows)); + memset(ndpi_thread_info[thread_id].workflow->stats.flow_confidence, 0, + sizeof(ndpi_thread_info[thread_id].workflow->stats.flow_confidence)); - if(_maliciousSHA1Path) - ndpi_load_malicious_sha1_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousSHA1Path); + if(_protoFilePath != NULL) + ndpi_load_protocols_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _protoFilePath); /* Enable/disable/configure LRU caches size here */ for(i = 0; i < NDPI_LRUCACHE_MAX; i++) { -- cgit v1.2.3