From 36abf06c6f59b66bde48e7b3028b4823ecc6ed85 Mon Sep 17 00:00:00 2001 From: Luca Deri Date: Tue, 29 Aug 2023 17:34:04 +0200 Subject: Swap from Aho-Corasick to an experimental/home-grown algorithm that uses a probabilistic approach for handling Internet domain names. For switching back to Aho-Corasick it is necessary to edit ndpi-typedefs.h and uncomment the line // #define USE_LEGACY_AHO_CORASICK [1] With Aho-Corasick $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.34 KB Flow Memory (per flow): 960 B Actual Memory: 33.09 MB Peak Memory: 33.09 MB [2] With the new algorithm $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.31 KB Flow Memory (per flow): 960 B Actual Memory: 7.42 MB Peak Memory: 7.42 MB In essence from ~33 MB to ~7 MB This new algorithm will enable larger lists to be loaded (e.g. top 1M domans https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) In ./lists there are file names that are named as _.list With -G ndpiReader can load all of them at startup --- src/lib/ndpi_main.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 113 insertions(+), 5 deletions(-) (limited to 'src/lib/ndpi_main.c') diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 0df5e61f7..d1c4e3746 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN @@ -2122,10 +2123,6 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp "BITCOIN", NDPI_PROTOCOL_CATEGORY_CRYPTO_CURRENCY, ndpi_build_default_ports(ports_a, 8333, 0, 0, 0, 0) /* TCP */, ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); - ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_GAMBLING, - "Gambling", NDPI_PROTOCOL_CATEGORY_WEB, - ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, - ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_PROTONVPN, "ProtonVPN", NDPI_PROTOCOL_CATEGORY_VPN, ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, @@ -2147,6 +2144,10 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); + ndpi_set_proto_defaults(ndpi_str, 0 /* encrypted */, 1 /* app proto */, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_FREE, + "Free", NDPI_PROTOCOL_CATEGORY_WEB, + ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */, + ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */); #ifdef CUSTOM_NDPI_PROTOCOLS #include "../../../nDPI-custom/custom_ndpi_main.c" @@ -2783,6 +2784,7 @@ static const char *categories[] = { "Allowed_Site", "Antimalware", "Crypto_Currency", + "Gambling" }; #if !defined(NDPI_CFFI_PREPROCESSING) && defined(__linux__) @@ -4144,11 +4146,117 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, } fclose(fd); - ndpi_enable_loaded_categories(ndpi_str); + + /* + Not necessay to call ndpi_enable_loaded_categories() as + ndpi_set_protocol_detection_bitmask2() will do that + */ + /* ndpi_enable_loaded_categories(ndpi_str); */ return(num); } +/* ******************************************************************** */ + +/* + Loads a file (separated by ) of domain names associated with the + specified category +*/ +int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str, + char *path, ndpi_protocol_category_t category_id) { + char buffer[256], *line; + FILE *fd; + u_int num_loaded = 0; + + if(!ndpi_str || !path || !ndpi_str->protocols_ptree) + return(-1); + +#ifdef NDPI_ENABLE_DEBUG_MESSAGES + printf("Loading %s [proto %d]\n", path, category_id); +#endif + + fd = fopen(path, "r"); + + if(fd == NULL) { + NDPI_LOG_ERR(ndpi_str, "Unable to open file %s [%s]\n", path, strerror(errno)); + return(-1); + } + + while(1) { + int len; + + line = fgets(buffer, sizeof(buffer), fd); + + if(line == NULL) + break; + + len = strlen(line); + + if((len <= 1) || (line[0] == '#')) + continue; + + if(ndpi_load_category(ndpi_str, line, category_id, NULL) > 0) + num_loaded++; + } + + fclose(fd); + return(num_loaded); +} + +/* ******************************************************************** */ + +/* + Load files (whose name is _