aboutsummaryrefslogtreecommitdiff
path: root/example/ndpiReader.c
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-08-29 17:34:04 +0200
committerLuca Deri <deri@ntop.org>2023-08-29 17:34:04 +0200
commit36abf06c6f59b66bde48e7b3028b4823ecc6ed85 (patch)
tree5b31146feaff0ae0f032b64cd2954de60e270efe /example/ndpiReader.c
parent1f693c3f5a5dcd9d69dffb610b9a81bd33f95382 (diff)
Swap from Aho-Corasick to an experimental/home-grown algorithm that uses a probabilistic
approach for handling Internet domain names. For switching back to Aho-Corasick it is necessary to edit ndpi-typedefs.h and uncomment the line // #define USE_LEGACY_AHO_CORASICK [1] With Aho-Corasick $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.34 KB Flow Memory (per flow): 960 B Actual Memory: 33.09 MB Peak Memory: 33.09 MB [2] With the new algorithm $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.31 KB Flow Memory (per flow): 960 B Actual Memory: 7.42 MB Peak Memory: 7.42 MB In essence from ~33 MB to ~7 MB This new algorithm will enable larger lists to be loaded (e.g. top 1M domans https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) In ./lists there are file names that are named as <category>_<string>.list With -G ndpiReader can load all of them at startup
Diffstat (limited to 'example/ndpiReader.c')
-rw-r--r--example/ndpiReader.c53
1 files changed, 33 insertions, 20 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 86452cbe1..65c07d2bc 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -80,6 +80,7 @@ static char *_customCategoryFilePath= NULL; /**< Custom categories file path */
static char *_maliciousJA3Path = NULL; /**< Malicious JA3 signatures */
static char *_maliciousSHA1Path = NULL; /**< Malicious SSL certificate SHA1 fingerprints */
static char *_riskyDomainFilePath = NULL; /**< Risky domain files */
+static char *_categoriesDirPath = NULL; /**< Directory containing domain files */
static u_int8_t live_capture = 0;
static u_int8_t undetected_flows_deleted = 0;
static FILE *csv_fp = NULL; /**< for CSV export */
@@ -543,6 +544,7 @@ static void help(u_int long_help) {
" -r <path> | Load risky domain file\n"
" -j <path> | Load malicious JA3 fingeprints\n"
" -S <path> | Load malicious SSL certificate SHA1 fingerprints\n"
+ " -G <dir> | Bind domain names to categories loading files from <dir>\n"
" -w <path> | Write test output on the specified file. This is useful for\n"
" | testing purposes in order to compare results across runs\n"
" -h | This help\n"
@@ -647,6 +649,7 @@ static struct option longopts[] = {
{ "filter", required_argument, NULL, 'f'},
{ "flow-stats", required_argument, NULL, 'F'},
{ "cpu-bind", required_argument, NULL, 'g'},
+ { "load-categories", required_argument, NULL, 'G'},
{ "loops", required_argument, NULL, 'l'},
{ "num-threads", required_argument, NULL, 'n'},
{ "ignore-vlanid", no_argument, NULL, 'I'},
@@ -965,7 +968,8 @@ static void parseOptions(int argc, char **argv) {
lru_cache_ttls[i] = -1; /* Use the default value */
}
- while((opt = getopt_long(argc, argv, "a:Ab:B:e:Ec:C:dDFf:g:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:X:w:zZ:q0123:456:7:89:m:MT:U:",
+ while((opt = getopt_long(argc, argv,
+ "a:Ab:B:e:Ec:C:dDFf:g:G:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:X:w:zZ:q0123:456:7:89:m:MT:U:",
longopts, &option_idx)) != EOF) {
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, " #### Handling option -%c [%s] #### \n", opt, optarg ? optarg : "");
@@ -1035,6 +1039,10 @@ static void parseOptions(int argc, char **argv) {
#endif
#endif
+ case 'G':
+ _categoriesDirPath = optarg;
+ break;
+
case 'l':
num_loops = atoi(optarg);
break;
@@ -1107,6 +1115,7 @@ static void parseOptions(int argc, char **argv) {
module_tmp = ndpi_init_detection_module(0);
if(!module_tmp)
break;
+
NDPI_BITMASK_SET_ALL(all);
ndpi_set_protocol_detection_bitmask2(module_tmp, &all);
ndpi_finalize_initialization(module_tmp);
@@ -2646,21 +2655,18 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) {
exit(-1);
}
- ndpi_set_protocol_detection_bitmask2(ndpi_thread_info[thread_id].workflow->ndpi_struct, &enabled_bitmask);
-
- // clear memory for results
- memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter));
- memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes));
- memset(ndpi_thread_info[thread_id].workflow->stats.protocol_flows, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_flows));
- memset(ndpi_thread_info[thread_id].workflow->stats.flow_confidence, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.flow_confidence));
+ if(_categoriesDirPath)
+ ndpi_load_categories_dir(ndpi_thread_info[thread_id].workflow->ndpi_struct, _categoriesDirPath);
+
+ if(_riskyDomainFilePath)
+ ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath);
- if(_protoFilePath != NULL)
- ndpi_load_protocols_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _protoFilePath);
+ if(_maliciousJA3Path)
+ ndpi_load_malicious_ja3_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousJA3Path);
+ if(_maliciousSHA1Path)
+ ndpi_load_malicious_sha1_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousSHA1Path);
+
if(_customCategoryFilePath) {
char *label = strrchr(_customCategoryFilePath, '/');
@@ -2672,14 +2678,21 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) {
ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath, label);
}
- if(_riskyDomainFilePath)
- ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath);
+ /* Make sure to load lists before finalizing the initialization */
+ ndpi_set_protocol_detection_bitmask2(ndpi_thread_info[thread_id].workflow->ndpi_struct, &enabled_bitmask);
- if(_maliciousJA3Path)
- ndpi_load_malicious_ja3_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousJA3Path);
+ // clear memory for results
+ memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter));
+ memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes));
+ memset(ndpi_thread_info[thread_id].workflow->stats.protocol_flows, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_flows));
+ memset(ndpi_thread_info[thread_id].workflow->stats.flow_confidence, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.flow_confidence));
- if(_maliciousSHA1Path)
- ndpi_load_malicious_sha1_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousSHA1Path);
+ if(_protoFilePath != NULL)
+ ndpi_load_protocols_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _protoFilePath);
/* Enable/disable/configure LRU caches size here */
for(i = 0; i < NDPI_LRUCACHE_MAX; i++) {