diff options
author | Luca Deri <deri@ntop.org> | 2023-08-29 17:34:04 +0200 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2023-08-29 17:34:04 +0200 |
commit | 36abf06c6f59b66bde48e7b3028b4823ecc6ed85 (patch) | |
tree | 5b31146feaff0ae0f032b64cd2954de60e270efe /src/lib/ndpi_main.c | |
parent | 1f693c3f5a5dcd9d69dffb610b9a81bd33f95382 (diff) |
Swap from Aho-Corasick to an experimental/home-grown algorithm that uses a probabilistic
approach for handling Internet domain names.
For switching back to Aho-Corasick it is necessary to edit
ndpi-typedefs.h and uncomment the line
// #define USE_LEGACY_AHO_CORASICK
[1] With Aho-Corasick
$ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory
nDPI Memory statistics:
nDPI Memory (once): 37.34 KB
Flow Memory (per flow): 960 B
Actual Memory: 33.09 MB
Peak Memory: 33.09 MB
[2] With the new algorithm
$ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory
nDPI Memory statistics:
nDPI Memory (once): 37.31 KB
Flow Memory (per flow): 960 B
Actual Memory: 7.42 MB
Peak Memory: 7.42 MB
In essence from ~33 MB to ~7 MB
This new algorithm will enable larger lists to be loaded (e.g. top 1M domans
https://s3-us-west-1.amazonaws.com/umbrella-static/index.html)
In ./lists there are file names that are named as <category>_<string>.list
With -G ndpiReader can load all of them at startup
Diffstat (limited to 'src/lib/ndpi_main.c')
-rw-r--r-- | src/lib/ndpi_main.c | 118 |
1 files changed, 113 insertions, 5 deletions
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 0df5e61f7..d1c4e3746 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -24,6 +24,7 @@ #include <stdlib.h> #include <errno.h> #include <sys/types.h> +#include <dirent.h> #define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN @@ -2122,10 +2123,6 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp "BITCOIN", NDPI_PROTOCOL_CATEGORY_CRYPTO_CURRENCY, ndpi_build_default_ports(ports_a, 8333, 0, 0, 0, 0) /* TCP */, ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); - ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_GAMBLING, - "Gambling", NDPI_PROTOCOL_CATEGORY_WEB, - ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, - ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_PROTONVPN, "ProtonVPN", NDPI_PROTOCOL_CATEGORY_VPN, ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, @@ -2147,6 +2144,10 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); + ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_FREE, + "Free", NDPI_PROTOCOL_CATEGORY_WEB, + ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, + ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); #ifdef CUSTOM_NDPI_PROTOCOLS #include "../../../nDPI-custom/custom_ndpi_main.c" @@ -2783,6 +2784,7 @@ static const char *categories[] = { "Allowed_Site", "Antimalware", "Crypto_Currency", + "Gambling" }; #if !defined(NDPI_CFFI_PREPROCESSING) && defined(__linux__) @@ -4144,13 +4146,119 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, } fclose(fd); - ndpi_enable_loaded_categories(ndpi_str); + + /* + Not necessay to call ndpi_enable_loaded_categories() as + ndpi_set_protocol_detection_bitmask2() will do that + */ + /* ndpi_enable_loaded_categories(ndpi_str); */ return(num); } /* ******************************************************************** */ +/* + Loads a file (separated by <cr>) of domain names associated with the + specified category +*/ +int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str, + char *path, ndpi_protocol_category_t category_id) { + char buffer[256], *line; + FILE *fd; + u_int num_loaded = 0; + + if(!ndpi_str || !path || !ndpi_str->protocols_ptree) + return(-1); + +#ifdef NDPI_ENABLE_DEBUG_MESSAGES + printf("Loading %s [proto %d]\n", path, category_id); +#endif + + fd = fopen(path, "r"); + + if(fd == NULL) { + NDPI_LOG_ERR(ndpi_str, "Unable to open file %s [%s]\n", path, strerror(errno)); + return(-1); + } + + while(1) { + int len; + + line = fgets(buffer, sizeof(buffer), fd); + + if(line == NULL) + break; + + len = strlen(line); + + if((len <= 1) || (line[0] == '#')) + continue; + + if(ndpi_load_category(ndpi_str, line, category_id, NULL) > 0) + num_loaded++; + } + + fclose(fd); + return(num_loaded); +} + +/* ******************************************************************** */ + +/* + Load files (whose name is <categoryid>_<label>.<extension>) stored + in a directory and bind each domain to the specified category. + + It can be used to load all files store in the lists/ directory + + It returns the number of loaded files or -1 in case of failure +*/ +int ndpi_load_categories_dir(struct ndpi_detection_module_struct *ndpi_str, + char *dir_path) { + DIR *dirp = opendir(dir_path); + struct dirent *dp; + int rc = 0; + + if (dirp == NULL) + return(-1); + + while((dp = readdir(dirp)) != NULL) { + char *underscore, *extn; + + if(dp->d_name[0] == '.') continue; + extn = strrchr(dp->d_name, '.'); + + if((extn == NULL) || strcmp(extn, ".list")) + continue; + + /* Check if the format is <proto it>_<string>.<extension> */ + if((underscore = strchr(dp->d_name, '_')) != NULL) { + ndpi_protocol_category_t proto_id; + + underscore[0] = '\0'; + proto_id = (ndpi_protocol_category_t)atoi(dp->d_name); + + if((proto_id > 0) && (proto_id < NDPI_LAST_IMPLEMENTED_PROTOCOL)) { + /* Valid file */ + char path[256]; + + underscore[0] = '_'; + snprintf(path, sizeof(path), "%s/%s", dir_path, dp->d_name); + + ndpi_load_category_file(ndpi_str, path, proto_id); + rc++; + } + } + } + + (void)closedir(dirp); + + return(rc); +} + + +/* ******************************************************************** */ + static int ndpi_load_risky_domain(struct ndpi_detection_module_struct *ndpi_str, char* domain_name) { if(ndpi_str->risky_domain_automa.ac_automa == NULL) { |