aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-08-29 17:34:04 +0200
committerLuca Deri <deri@ntop.org>2023-08-29 17:34:04 +0200
commit36abf06c6f59b66bde48e7b3028b4823ecc6ed85 (patch)
tree5b31146feaff0ae0f032b64cd2954de60e270efe
parent1f693c3f5a5dcd9d69dffb610b9a81bd33f95382 (diff)
Swap from Aho-Corasick to an experimental/home-grown algorithm that uses a probabilistic
approach for handling Internet domain names. For switching back to Aho-Corasick it is necessary to edit ndpi-typedefs.h and uncomment the line // #define USE_LEGACY_AHO_CORASICK [1] With Aho-Corasick $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.34 KB Flow Memory (per flow): 960 B Actual Memory: 33.09 MB Peak Memory: 33.09 MB [2] With the new algorithm $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.31 KB Flow Memory (per flow): 960 B Actual Memory: 7.42 MB Peak Memory: 7.42 MB In essence from ~33 MB to ~7 MB This new algorithm will enable larger lists to be loaded (e.g. top 1M domans https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) In ./lists there are file names that are named as <category>_<string>.list With -G ndpiReader can load all of them at startup
-rw-r--r--example/ndpiReader.c53
-rw-r--r--lists/107_gambling.list (renamed from lists/gambling.list)0
-rw-r--r--lists/107_gambling_custom.list4
-rw-r--r--src/include/ndpi_api.h24
-rw-r--r--src/include/ndpi_protocol_ids.h2
-rw-r--r--src/include/ndpi_typedefs.h5
-rw-r--r--src/lib/ndpi_domain_classify.c70
-rw-r--r--src/lib/ndpi_main.c118
-rw-r--r--tests/cfgs/default/pcap/bets.pcapngbin0 -> 10740 bytes
-rw-r--r--tests/cfgs/default/result/bets.pcapng.out30
-rwxr-xr-xutils/gambling_sites_download.sh3
11 files changed, 268 insertions, 41 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 86452cbe1..65c07d2bc 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -80,6 +80,7 @@ static char *_customCategoryFilePath= NULL; /**< Custom categories file path */
static char *_maliciousJA3Path = NULL; /**< Malicious JA3 signatures */
static char *_maliciousSHA1Path = NULL; /**< Malicious SSL certificate SHA1 fingerprints */
static char *_riskyDomainFilePath = NULL; /**< Risky domain files */
+static char *_categoriesDirPath = NULL; /**< Directory containing domain files */
static u_int8_t live_capture = 0;
static u_int8_t undetected_flows_deleted = 0;
static FILE *csv_fp = NULL; /**< for CSV export */
@@ -543,6 +544,7 @@ static void help(u_int long_help) {
" -r <path> | Load risky domain file\n"
" -j <path> | Load malicious JA3 fingeprints\n"
" -S <path> | Load malicious SSL certificate SHA1 fingerprints\n"
+ " -G <dir> | Bind domain names to categories loading files from <dir>\n"
" -w <path> | Write test output on the specified file. This is useful for\n"
" | testing purposes in order to compare results across runs\n"
" -h | This help\n"
@@ -647,6 +649,7 @@ static struct option longopts[] = {
{ "filter", required_argument, NULL, 'f'},
{ "flow-stats", required_argument, NULL, 'F'},
{ "cpu-bind", required_argument, NULL, 'g'},
+ { "load-categories", required_argument, NULL, 'G'},
{ "loops", required_argument, NULL, 'l'},
{ "num-threads", required_argument, NULL, 'n'},
{ "ignore-vlanid", no_argument, NULL, 'I'},
@@ -965,7 +968,8 @@ static void parseOptions(int argc, char **argv) {
lru_cache_ttls[i] = -1; /* Use the default value */
}
- while((opt = getopt_long(argc, argv, "a:Ab:B:e:Ec:C:dDFf:g:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:X:w:zZ:q0123:456:7:89:m:MT:U:",
+ while((opt = getopt_long(argc, argv,
+ "a:Ab:B:e:Ec:C:dDFf:g:G:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:X:w:zZ:q0123:456:7:89:m:MT:U:",
longopts, &option_idx)) != EOF) {
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, " #### Handling option -%c [%s] #### \n", opt, optarg ? optarg : "");
@@ -1035,6 +1039,10 @@ static void parseOptions(int argc, char **argv) {
#endif
#endif
+ case 'G':
+ _categoriesDirPath = optarg;
+ break;
+
case 'l':
num_loops = atoi(optarg);
break;
@@ -1107,6 +1115,7 @@ static void parseOptions(int argc, char **argv) {
module_tmp = ndpi_init_detection_module(0);
if(!module_tmp)
break;
+
NDPI_BITMASK_SET_ALL(all);
ndpi_set_protocol_detection_bitmask2(module_tmp, &all);
ndpi_finalize_initialization(module_tmp);
@@ -2646,21 +2655,18 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) {
exit(-1);
}
- ndpi_set_protocol_detection_bitmask2(ndpi_thread_info[thread_id].workflow->ndpi_struct, &enabled_bitmask);
-
- // clear memory for results
- memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter));
- memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes));
- memset(ndpi_thread_info[thread_id].workflow->stats.protocol_flows, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_flows));
- memset(ndpi_thread_info[thread_id].workflow->stats.flow_confidence, 0,
- sizeof(ndpi_thread_info[thread_id].workflow->stats.flow_confidence));
+ if(_categoriesDirPath)
+ ndpi_load_categories_dir(ndpi_thread_info[thread_id].workflow->ndpi_struct, _categoriesDirPath);
+
+ if(_riskyDomainFilePath)
+ ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath);
- if(_protoFilePath != NULL)
- ndpi_load_protocols_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _protoFilePath);
+ if(_maliciousJA3Path)
+ ndpi_load_malicious_ja3_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousJA3Path);
+ if(_maliciousSHA1Path)
+ ndpi_load_malicious_sha1_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousSHA1Path);
+
if(_customCategoryFilePath) {
char *label = strrchr(_customCategoryFilePath, '/');
@@ -2672,14 +2678,21 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) {
ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath, label);
}
- if(_riskyDomainFilePath)
- ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath);
+ /* Make sure to load lists before finalizing the initialization */
+ ndpi_set_protocol_detection_bitmask2(ndpi_thread_info[thread_id].workflow->ndpi_struct, &enabled_bitmask);
- if(_maliciousJA3Path)
- ndpi_load_malicious_ja3_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousJA3Path);
+ // clear memory for results
+ memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter));
+ memset(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_counter_bytes));
+ memset(ndpi_thread_info[thread_id].workflow->stats.protocol_flows, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.protocol_flows));
+ memset(ndpi_thread_info[thread_id].workflow->stats.flow_confidence, 0,
+ sizeof(ndpi_thread_info[thread_id].workflow->stats.flow_confidence));
- if(_maliciousSHA1Path)
- ndpi_load_malicious_sha1_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _maliciousSHA1Path);
+ if(_protoFilePath != NULL)
+ ndpi_load_protocols_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _protoFilePath);
/* Enable/disable/configure LRU caches size here */
for(i = 0; i < NDPI_LRUCACHE_MAX; i++) {
diff --git a/lists/gambling.list b/lists/107_gambling.list
index 5e4a6ae86..5e4a6ae86 100644
--- a/lists/gambling.list
+++ b/lists/107_gambling.list
diff --git a/lists/107_gambling_custom.list b/lists/107_gambling_custom.list
new file mode 100644
index 000000000..0a3f13dd2
--- /dev/null
+++ b/lists/107_gambling_custom.list
@@ -0,0 +1,4 @@
+#
+# Custom list (not built from Internet lists)
+#
+1084bets10.com
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 941578f47..7f4208ad0 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -808,6 +808,30 @@ extern "C" {
int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, const char* path, void *user_data);
/**
+ * Loads a file (separated by <cr>) of domain names associated with the specified category
+ *
+ * @par ndpi_mod = the detection module
+ * @par path = the path of the file
+ * @par category_id = Id of the category to which domains will be associated
+ * @return 0 if the file is loaded correctly;
+ * -1 else
+ */
+ int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str,
+ char* path, ndpi_protocol_category_t category_id);
+
+ /**
+ * Load files (whose name is <categoryid>_<label>.<extension>) stored
+ * in a directory and bind each domain to the specified category.
+ *
+ * @par ndpi_mod = the detection module
+ * @par path = the path of the file
+ * @return 0 if the file is loaded correctly;
+ * -1 else
+ */
+ int ndpi_load_categories_dir(struct ndpi_detection_module_struct *ndpi_str,
+ char* path);
+
+ /**
* Read a file and load the list of risky domains
*
* @par ndpi_mod = the detection module
diff --git a/src/include/ndpi_protocol_ids.h b/src/include/ndpi_protocol_ids.h
index dda9b7acd..d3db7726d 100644
--- a/src/include/ndpi_protocol_ids.h
+++ b/src/include/ndpi_protocol_ids.h
@@ -367,7 +367,7 @@ typedef enum {
NDPI_PROTOCOL_HOTS = 336, /* Heroes of the Storm */
NDPI_PROTOCOL_FACEBOOK_REEL_STORY = 337,
NDPI_PROTOCOL_SRTP = 338,
- NDPI_PROTOCOL_GAMBLING = 339,
+ NDPI_PROTOCOL_FREE = 339, /* Formerly used by gambling now a category. It can be reused in the future */
NDPI_PROTOCOL_EPICGAMES = 340,
NDPI_PROTOCOL_GEFORCENOW = 341,
NDPI_PROTOCOL_NVIDIA = 342,
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 523ba4aec..75df402ef 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -31,7 +31,7 @@
/* Used by both nDPI core and patricia code under third-party */
#include "ndpi_patricia_typedefs.h"
-#define USE_LEGACY_AHO_CORASICK
+// #define USE_LEGACY_AHO_CORASICK
#ifndef NDPI_CFFI_PREPROCESSING
#ifndef u_char
@@ -1101,6 +1101,9 @@ typedef enum {
Crypto Currency e.g Bitcoin, Litecoin, Etherum ..et.
*/
NDPI_PROTOCOL_CATEGORY_CRYPTO_CURRENCY = 106,
+
+ /* Gambling websites */
+ NDPI_PROTOCOL_CATEGORY_GAMBLING = 107,
/*
IMPORTANT
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index d94d692fa..8b66c940e 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -48,7 +48,8 @@ typedef struct {
ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
} ndpi_domain_classifications_t;
-//#define DEBUG
+// #define DEBUG_ADD
+// #define DEBUG_CONTAINS
/* ********************************************************** */
@@ -133,7 +134,14 @@ static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) {
if(domain == NULL) return(false);
if((len = strlen(domain)) == 0) return(false);
- if(domain[len-1] == '.') domain[len-1] = '0';
+
+ len--;
+ while((len > 0)
+ && ((domain[len] == '.')
+ || (domain[len] == '\n')
+ || (domain[len] == '\r'))
+ )
+ domain[len--] = '\0';
if(domain[0] == '.') ++domain;
@@ -178,7 +186,9 @@ static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain
u_int32_t bitmap_id = 0;
bool quit = false;
- elem = strrchr(domain, '.');
+ if((elem = strrchr(domain, '.')) == NULL)
+ return(false); /* This does not look like a domain */
+
while(elem) {
u_int32_t h;
@@ -263,8 +273,12 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
char *domain) {
u_int32_t i;
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
- char buf[256];
+ char buf[256], *dot = strrchr(domain, '.');
+ if(!dot) return(false);
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
+ return(false);
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->class[i] != NULL) {
if(s->class[i]->class_id == class_id) {
@@ -277,7 +291,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
return(false);
s->class[i]->class_id = class_id;
- s->class[i]->domains = ndpi_domain_search_alloc();
+ s->class[i]->domains = ndpi_domain_search_alloc();
break;
}
}
@@ -287,7 +301,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
snprintf(buf, sizeof(buf), "%s", domain);
-#ifdef DEBUG
+#ifdef DEBUG_ADD
printf("[add] %s @ %u\n", domain, class_id);
#endif
@@ -356,18 +370,48 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
/* ********************************************************** */
+static bool is_valid_domain_char(u_char c) {
+ if(((c >= 'A')&& (c <= 'Z'))
+ || ((c >= 'a')&& (c <= 'z'))
+ || ((c >= '0')&& (c <= '9'))
+ || (c == '_')
+ || (c == '-')
+ || (c == '.'))
+ return(true);
+ else
+ return(false);
+}
+
+/* ********************************************************** */
+
u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *_s,
char *domain) {
u_int32_t i, len;
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+ char *dot;
- if(!domain) return(0);
- if((len = strlen(domain)) == 0) return(0);
+ if(!domain) return(0);
+ if((len = strlen(domain)) == 0) return(0);
+ if((dot = strrchr(domain, '.')) == NULL) return(0);
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(0);
/* This is a number or a numeric IP or similar */
- if(isdigit(domain[len-1]) && isdigit(domain[0]))
+ if(isdigit(domain[len-1]) && isdigit(domain[0])) {
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s INVALID\n", domain);
+#endif
+
return(0);
+ }
+
+ if(!is_valid_domain_char(domain[0])) {
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s INVALID\n", domain);
+#endif
+ return(0);
+ }
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->class[i] != NULL) {
char buf[256];
@@ -375,16 +419,16 @@ u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *_s,
snprintf(buf, sizeof(buf), "%s", domain);
if(ndpi_domain_search_contains(s->class[i]->domains, buf)) {
-#ifdef DEBUG
- printf("[search] %s = %d\n", domain, s->class[i]->class_id);
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s = %d\n", domain, s->class[i]->class_id);
#endif
return(s->class[i]->class_id);
}
}
}
-#ifdef DEBUG
- printf("[search] %s NOT FOUND\n", domain);
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s NOT FOUND\n", domain);
#endif
return(0);
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 0df5e61f7..d1c4e3746 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -24,6 +24,7 @@
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
+#include <dirent.h>
#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
@@ -2122,10 +2123,6 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp
"BITCOIN", NDPI_PROTOCOL_CATEGORY_CRYPTO_CURRENCY,
ndpi_build_default_ports(ports_a, 8333, 0, 0, 0, 0) /* TCP */,
ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
- ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_GAMBLING,
- "Gambling", NDPI_PROTOCOL_CATEGORY_WEB,
- ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
- ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_PROTONVPN,
"ProtonVPN", NDPI_PROTOCOL_CATEGORY_VPN,
ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
@@ -2147,6 +2144,10 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp
ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
+ ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_FREE,
+ "Free", NDPI_PROTOCOL_CATEGORY_WEB,
+ ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
+ ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
#ifdef CUSTOM_NDPI_PROTOCOLS
#include "../../../nDPI-custom/custom_ndpi_main.c"
@@ -2783,6 +2784,7 @@ static const char *categories[] = {
"Allowed_Site",
"Antimalware",
"Crypto_Currency",
+ "Gambling"
};
#if !defined(NDPI_CFFI_PREPROCESSING) && defined(__linux__)
@@ -4144,13 +4146,119 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str,
}
fclose(fd);
- ndpi_enable_loaded_categories(ndpi_str);
+
+ /*
+ Not necessay to call ndpi_enable_loaded_categories() as
+ ndpi_set_protocol_detection_bitmask2() will do that
+ */
+ /* ndpi_enable_loaded_categories(ndpi_str); */
return(num);
}
/* ******************************************************************** */
+/*
+ Loads a file (separated by <cr>) of domain names associated with the
+ specified category
+*/
+int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str,
+ char *path, ndpi_protocol_category_t category_id) {
+ char buffer[256], *line;
+ FILE *fd;
+ u_int num_loaded = 0;
+
+ if(!ndpi_str || !path || !ndpi_str->protocols_ptree)
+ return(-1);
+
+#ifdef NDPI_ENABLE_DEBUG_MESSAGES
+ printf("Loading %s [proto %d]\n", path, category_id);
+#endif
+
+ fd = fopen(path, "r");
+
+ if(fd == NULL) {
+ NDPI_LOG_ERR(ndpi_str, "Unable to open file %s [%s]\n", path, strerror(errno));
+ return(-1);
+ }
+
+ while(1) {
+ int len;
+
+ line = fgets(buffer, sizeof(buffer), fd);
+
+ if(line == NULL)
+ break;
+
+ len = strlen(line);
+
+ if((len <= 1) || (line[0] == '#'))
+ continue;
+
+ if(ndpi_load_category(ndpi_str, line, category_id, NULL) > 0)
+ num_loaded++;
+ }
+
+ fclose(fd);
+ return(num_loaded);
+}
+
+/* ******************************************************************** */
+
+/*
+ Load files (whose name is <categoryid>_<label>.<extension>) stored
+ in a directory and bind each domain to the specified category.
+
+ It can be used to load all files store in the lists/ directory
+
+ It returns the number of loaded files or -1 in case of failure
+*/
+int ndpi_load_categories_dir(struct ndpi_detection_module_struct *ndpi_str,
+ char *dir_path) {
+ DIR *dirp = opendir(dir_path);
+ struct dirent *dp;
+ int rc = 0;
+
+ if (dirp == NULL)
+ return(-1);
+
+ while((dp = readdir(dirp)) != NULL) {
+ char *underscore, *extn;
+
+ if(dp->d_name[0] == '.') continue;
+ extn = strrchr(dp->d_name, '.');
+
+ if((extn == NULL) || strcmp(extn, ".list"))
+ continue;
+
+ /* Check if the format is <proto it>_<string>.<extension> */
+ if((underscore = strchr(dp->d_name, '_')) != NULL) {
+ ndpi_protocol_category_t proto_id;
+
+ underscore[0] = '\0';
+ proto_id = (ndpi_protocol_category_t)atoi(dp->d_name);
+
+ if((proto_id > 0) && (proto_id < NDPI_LAST_IMPLEMENTED_PROTOCOL)) {
+ /* Valid file */
+ char path[256];
+
+ underscore[0] = '_';
+ snprintf(path, sizeof(path), "%s/%s", dir_path, dp->d_name);
+
+ ndpi_load_category_file(ndpi_str, path, proto_id);
+ rc++;
+ }
+ }
+ }
+
+ (void)closedir(dirp);
+
+ return(rc);
+}
+
+
+/* ******************************************************************** */
+
static int ndpi_load_risky_domain(struct ndpi_detection_module_struct *ndpi_str,
char* domain_name) {
if(ndpi_str->risky_domain_automa.ac_automa == NULL) {
diff --git a/tests/cfgs/default/pcap/bets.pcapng b/tests/cfgs/default/pcap/bets.pcapng
new file mode 100644
index 000000000..090cf6f22
--- /dev/null
+++ b/tests/cfgs/default/pcap/bets.pcapng
Binary files differ
diff --git a/tests/cfgs/default/result/bets.pcapng.out b/tests/cfgs/default/result/bets.pcapng.out
new file mode 100644
index 000000000..c17db6426
--- /dev/null
+++ b/tests/cfgs/default/result/bets.pcapng.out
@@ -0,0 +1,30 @@
+Guessed flow protos: 0
+
+DPI Packets (TCP): 6 (6.00 pkts/flow)
+Confidence DPI : 1 (flows)
+Num dissector calls: 1 (1.00 diss/flow)
+LRU cache ookla: 0/0/0 (insert/search/found)
+LRU cache bittorrent: 0/0/0 (insert/search/found)
+LRU cache zoom: 0/0/0 (insert/search/found)
+LRU cache stun: 0/0/0 (insert/search/found)
+LRU cache tls_cert: 0/2/0 (insert/search/found)
+LRU cache mining: 0/0/0 (insert/search/found)
+LRU cache msteams: 0/0/0 (insert/search/found)
+LRU cache stun_zoom: 0/0/0 (insert/search/found)
+Automa host: 1/0 (search/found)
+Automa domain: 1/0 (search/found)
+Automa tls cert: 0/0 (search/found)
+Automa risk mask: 0/0 (search/found)
+Automa common alpns: 2/2 (search/found)
+Patricia risk mask: 0/0 (search/found)
+Patricia risk: 0/0 (search/found)
+Patricia protocols: 1/1 (search/found)
+
+TLS 33 9228 1
+
+JA3 Host Stats:
+ IP Address # JA3C
+ 1 192.168.10.2 1
+
+
+ 1 TCP 192.168.10.2:60099 <-> 13.224.103.22:443 [proto: 91/TLS][IP: 265/AmazonAWS][Encrypted][Confidence: DPI][DPI packets: 6][cat: Gambling/107][17 pkts/1469 bytes <-> 16 pkts/7759 bytes][Goodput ratio: 39/89][0.19 sec][Hostname/SNI: www.1084bets10.com][(Advertised) ALPNs: h2;http/1.1][TLS Supported Versions: TLSv1.3;TLSv1.2;TLSv1.1;TLSv1][bytes ratio: -0.682 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 7/9 46/45 16/17][Pkt Len c2s/s2c min/avg/max/stddev: 52/52 86/485 380/1420 78/609][TLSv1.3][JA3C: 375c6162a492dfbf2795909110ce8424][JA3S: f4febc55ea12b31ae17cfb7e614afda8][Firefox][Cipher: TLS_AES_128_GCM_SHA256][Plen Bins: 14,14,21,0,7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,28,0,0,0,0,0]
diff --git a/utils/gambling_sites_download.sh b/utils/gambling_sites_download.sh
index 135e77889..82101e516 100755
--- a/utils/gambling_sites_download.sh
+++ b/utils/gambling_sites_download.sh
@@ -5,7 +5,8 @@ set -e
cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1
-LIST=../lists/gambling.list
+# NDPI_PROTOCOL_CATEGORY_GAMBLING = 107
+LIST=../lists/107_gambling.list
printf '(1) %s\n' "Scraping Illegal Gambling Sites (Belgium)"
DOMAINS="$(curl -s 'https://www.gamingcommission.be/en/gaming-commission/illegal-games-of-chance/list-of-illegal-gambling-sites' | sed -n 's/^<td[^>]\+>\(.\+\.[a-zA-Z0-9]\+\)\(\|\/.*[^<]*\)<\/td>/\1/gp' || exit 1)"