From 36abf06c6f59b66bde48e7b3028b4823ecc6ed85 Mon Sep 17 00:00:00 2001 From: Luca Deri Date: Tue, 29 Aug 2023 17:34:04 +0200 Subject: Swap from Aho-Corasick to an experimental/home-grown algorithm that uses a probabilistic approach for handling Internet domain names. For switching back to Aho-Corasick it is necessary to edit ndpi-typedefs.h and uncomment the line // #define USE_LEGACY_AHO_CORASICK [1] With Aho-Corasick $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.34 KB Flow Memory (per flow): 960 B Actual Memory: 33.09 MB Peak Memory: 33.09 MB [2] With the new algorithm $ ./example/ndpiReader -G ./lists/ -i tests/pcap/ookla.pcap | grep Memory nDPI Memory statistics: nDPI Memory (once): 37.31 KB Flow Memory (per flow): 960 B Actual Memory: 7.42 MB Peak Memory: 7.42 MB In essence from ~33 MB to ~7 MB This new algorithm will enable larger lists to be loaded (e.g. top 1M domans https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) In ./lists there are file names that are named as _.list With -G ndpiReader can load all of them at startup --- src/include/ndpi_api.h | 24 ++++++++ src/include/ndpi_protocol_ids.h | 2 +- src/include/ndpi_typedefs.h | 5 +- src/lib/ndpi_domain_classify.c | 70 +++++++++++++++++++----- src/lib/ndpi_main.c | 118 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 199 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 941578f47..7f4208ad0 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -807,6 +807,30 @@ extern "C" { */ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, const char* path, void *user_data); + /** + * Loads a file (separated by ) of domain names associated with the specified category + * + * @par ndpi_mod = the detection module + * @par path = the path of the file + * @par category_id = Id of the category to which domains will be associated + * @return 0 if the file is loaded correctly; + * -1 else + */ + int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str, + char* path, ndpi_protocol_category_t category_id); + + /** + * Load files (whose name is _