diff options
author | Luca Deri <lucaderi@users.noreply.github.com> | 2024-04-18 23:21:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-18 23:21:40 +0200 |
commit | ad117bfaabd3bc75dc70d0ddbc4ba18c86c40dbd (patch) | |
tree | 3b1fb6016da1e114bca190ed6a868421fd9c32f1 /example/ndpiReader.c | |
parent | 108b8331d5b345e110c9ef110a6aa95a2767a640 (diff) |
Domain Classification Improvements (#2396)
* Added
size_t ndpi_compress_str(const char * in, size_t len, char * out, size_t bufsize);
size_t ndpi_decompress_str(const char * in, size_t len, char * out, size_t bufsize);
used to compress short strings such as domain names. This code is based on
https://github.com/Ed-von-Schleck/shoco
* Major code rewrite for ndpi_hash and ndpi_domain_classify
* Improvements to make sure custom categories are loaded and enabled
* Fixed string encoding
* Extended SalesForce/Cloudflare domains list
Diffstat (limited to 'example/ndpiReader.c')
-rw-r--r-- | example/ndpiReader.c | 160 |
1 files changed, 106 insertions, 54 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 2621d0465..73d469952 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -1,7 +1,7 @@ /* * ndpiReader.c * - * Copyright (C) 2011-23 - ntop.org + * Copyright (C) 2011-24 - ntop.org * * nDPI is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -377,21 +377,21 @@ void ndpiCheckHostStringMatch(char *testChar) { testChar, strlen(testChar), &match); if(testRes) { - memset( &detected_protocol, 0, sizeof(ndpi_protocol) ); + memset(&detected_protocol, 0, sizeof(ndpi_protocol) ); detected_protocol.app_protocol = match.protocol_id; detected_protocol.master_protocol = 0; detected_protocol.category = match.protocol_category; - ndpi_protocol2name( ndpi_str, detected_protocol, appBufStr, - sizeof(appBufStr)); + ndpi_protocol2name(ndpi_str, detected_protocol, appBufStr, + sizeof(appBufStr)); printf("Match Found for string [%s] -> P(%d) B(%d) C(%d) => %s %s %s\n", testChar, match.protocol_id, match.protocol_breed, match.protocol_category, appBufStr, - ndpi_get_proto_breed_name( match.protocol_breed ), - ndpi_category_get_name( ndpi_str, match.protocol_category)); + ndpi_get_proto_breed_name(match.protocol_breed ), + ndpi_category_get_name(ndpi_str, match.protocol_category)); } else printf("Match NOT Found for string: %s\n\n", testChar ); @@ -675,17 +675,17 @@ static void help(u_int long_help) { max_num_reported_top_payloads, max_num_tcp_dissected_pkts, max_num_udp_dissected_pkts); NDPI_PROTOCOL_BITMASK all; - struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL); + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); NDPI_BITMASK_SET_ALL(all); - ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all); + ndpi_set_protocol_detection_bitmask2(ndpi_str, &all); if(_protoFilePath != NULL) - ndpi_load_protocols_file(ndpi_info_mod, _protoFilePath); + ndpi_load_protocols_file(ndpi_str, _protoFilePath); - ndpi_finalize_initialization(ndpi_info_mod); + ndpi_finalize_initialization(ndpi_str); printf("\nProtocols configuration parameters:\n"); - ndpi_dump_config(ndpi_info_mod, stdout); + ndpi_dump_config(ndpi_str, stdout); #ifndef WIN32 printf("\nExcap (wireshark) options:\n" @@ -712,13 +712,13 @@ static void help(u_int long_help) { "Id", "Userd-id", "Protocol", "Layer_4", "Nw_Proto", "Breed", "Category"); num_threads = 1; - ndpi_dump_protocols(ndpi_info_mod, stdout); + ndpi_dump_protocols(ndpi_str, stdout); printf("\n\nnDPI supported risks:\n"); ndpi_dump_risks_score(stdout); } - ndpi_exit_detection_module(ndpi_info_mod); + ndpi_exit_detection_module(ndpi_str); exit(!long_help); } @@ -839,10 +839,10 @@ void extcap_config() { ndpi_proto_defaults_t *proto_defaults; #endif - struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL); + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); #if 0 - ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_info_mod); - proto_defaults = ndpi_get_proto_defaults(ndpi_info_mod); + ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_str); + proto_defaults = ndpi_get_proto_defaults(ndpi_str); #endif /* -i <interface> */ @@ -876,7 +876,7 @@ void extcap_config() { ndpi_free(protos); #endif - ndpi_exit_detection_module(ndpi_info_mod); + ndpi_exit_detection_module(ndpi_str); extcap_exit = 1; } @@ -1347,10 +1347,10 @@ static void parseOptions(int argc, char **argv) { case '9': { - struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL); - extcap_packet_filter = ndpi_get_proto_by_name(ndpi_info_mod, optarg); + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); + extcap_packet_filter = ndpi_get_proto_by_name(ndpi_str, optarg); if(extcap_packet_filter == NDPI_PROTOCOL_UNKNOWN) extcap_packet_filter = atoi(optarg); - ndpi_exit_detection_module(ndpi_info_mod); + ndpi_exit_detection_module(ndpi_str); break; } @@ -1524,7 +1524,7 @@ void print_bin(FILE *fout, const char *label, struct ndpi_bin *b) { static void print_ndpi_address_port(FILE *out, const char *label, ndpi_address_port *ap) { if(ap->port != 0) { char buf[INET6_ADDRSTRLEN]; - + if(ap->is_ipv6) { inet_ntop(AF_INET6, &ap->address, buf, sizeof(buf)); fprintf(out, "[%s: [%s]:%u]", label, buf, ap->port); @@ -1892,7 +1892,7 @@ static void printFlow(u_int32_t id, struct ndpi_flow_info *flow, u_int16_t threa print_ndpi_address_port(out, "Relayed IP/Port", &flow->stun.relayed_address); print_ndpi_address_port(out, "Rsp Origin IP/Port", &flow->stun.response_origin); print_ndpi_address_port(out, "Other IP/Port", &flow->stun.other_address); - + if(flow->http.url[0] != '\0') { ndpi_risk_enum risk = ndpi_validate_url(flow->http.url); @@ -3883,7 +3883,7 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us if(!quiet_mode) { printf("\nnDPI Memory statistics:\n"); printf("\tnDPI Memory (once): %-13s\n", formatBytes(ndpi_get_ndpi_detection_module_size(), buf, sizeof(buf))); - printf("\tFlow Memory (per flow): %-13s\n", formatBytes( ndpi_detection_get_sizeof_ndpi_flow_struct(), buf, sizeof(buf))); + printf("\tFlow Memory (per flow): %-13s\n", formatBytes(ndpi_detection_get_sizeof_ndpi_flow_struct(), buf, sizeof(buf))); printf("\tActual Memory: %-13s\n", formatBytes(current_ndpi_memory, buf, sizeof(buf))); printf("\tPeak Memory: %-13s\n", formatBytes(max_ndpi_memory, buf, sizeof(buf))); printf("\tSetup Time: %lu msec\n", (unsigned long)(setup_time_usec/1000)); @@ -5191,21 +5191,21 @@ void rsiUnitTest() { void hashUnitTest() { ndpi_str_hash *h; char * const dict[] = { "hello", "world", NULL }; - int i; + u_int16_t i; assert(ndpi_hash_init(&h) == 0); assert(h == NULL); for(i=0; dict[i] != NULL; i++) { u_int8_t l = strlen(dict[i]); - int * v; + u_int16_t v; - assert(ndpi_hash_add_entry(&h, dict[i], l, &i) == 0); - assert(ndpi_hash_find_entry(h, dict[i], l, (void **)&v) == 0); - assert(v == (void *)&i && *v == i); + assert(ndpi_hash_add_entry(&h, dict[i], l, i) == 0); + assert(ndpi_hash_find_entry(h, dict[i], l, &v) == 0); + assert(v == i); } - ndpi_hash_free(&h, NULL); + ndpi_hash_free(&h); assert(h == NULL); } @@ -5797,31 +5797,74 @@ void loadStressTest() { /* *********************************************** */ +void encodeDomainsUnitTest() { + NDPI_PROTOCOL_BITMASK all; + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); + const char *lists_path = "../lists/public_suffix_list.dat"; + struct stat st; + + if(stat(lists_path, &st) == 0) { + u_int16_t suffix_id; + char out[256]; + char *str; + ndpi_protocol_category_t id; + + NDPI_BITMASK_SET_ALL(all); + ndpi_set_protocol_detection_bitmask2(ndpi_str, &all); + + assert(ndpi_load_domain_suffixes(ndpi_str, (char*)lists_path) == 0); + + ndpi_get_host_domain_suffix(ndpi_str, "lcb.it", &suffix_id); assert(suffix_id == 1117); + ndpi_get_host_domain_suffix(ndpi_str, "www.ntop.org", &suffix_id); assert(suffix_id == 4503); + ndpi_get_host_domain_suffix(ndpi_str, "www.bbc.co.uk", &suffix_id); assert(suffix_id == 5242); + + str = (char*)"www.ntop.org"; assert(ndpi_encode_domain(ndpi_str, str, out, sizeof(out)) == 8); + str = (char*)"www.bbc.co.uk"; assert(ndpi_encode_domain(ndpi_str, str, out, sizeof(out)) == 8); + + assert(ndpi_load_categories_dir(ndpi_str, "../lists")); + assert(ndpi_load_categories_file(ndpi_str, "./categories.txt", "categories.txt")); + + str = (char*)"2001:db8:1::1"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100); + str = (char*)"www.internetbadguys.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100); + str = (char*)"0grand-casino.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107); + str = (char*)"222.0grand-casino.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107); + str = (char*)"10bet.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107); + str = (char*)"www.ntop.org"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == -1); assert(id == 0); + str = (char*)"www.andrewpope.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100); + } + + ndpi_exit_detection_module(ndpi_str); +} + +/* *********************************************** */ + void domainsUnitTest() { NDPI_PROTOCOL_BITMASK all; - struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL); + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); const char *lists_path = "../lists/public_suffix_list.dat"; struct stat st; if(stat(lists_path, &st) == 0) { + u_int16_t suffix_id; + NDPI_BITMASK_SET_ALL(all); - ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all); + ndpi_set_protocol_detection_bitmask2(ndpi_str, &all); - assert(ndpi_load_domain_suffixes(ndpi_info_mod, (char*)lists_path) == 0); + assert(ndpi_load_domain_suffixes(ndpi_str, (char*)lists_path) == 0); - assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.chosei.chiba.jp"), "chosei.chiba.jp") == 0); - assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.unipi.it"), "it") == 0); - assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "mail.apple.com"), "com") == 0); - assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.bbc.co.uk"), "co.uk") == 0); + assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.chosei.chiba.jp", &suffix_id), "chosei.chiba.jp") == 0); + assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.unipi.it", &suffix_id), "it") == 0); + assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "mail.apple.com", &suffix_id), "com") == 0); + assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.bbc.co.uk", &suffix_id), "co.uk") == 0); - assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.chosei.chiba.jp"), "www.chosei.chiba.jp") == 0); - assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.unipi.it"), "unipi.it") == 0); - assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "mail.apple.com"), "apple.com") == 0); - assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.bbc.co.uk"), "bbc.co.uk") == 0); - assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0); + assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.chosei.chiba.jp"), "www.chosei.chiba.jp") == 0); + assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.unipi.it"), "unipi.it") == 0); + assert(strcmp(ndpi_get_host_domain(ndpi_str, "mail.apple.com"), "apple.com") == 0); + assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.bbc.co.uk"), "bbc.co.uk") == 0); + assert(strcmp(ndpi_get_host_domain(ndpi_str, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0); } - ndpi_exit_detection_module(ndpi_info_mod); + ndpi_exit_detection_module(ndpi_str); } /* *********************************************** */ @@ -5829,27 +5872,29 @@ void domainsUnitTest() { void domainSearchUnitTest() { ndpi_domain_classify *sc = ndpi_domain_classify_alloc(); char *domain = "ntop.org"; - u_int8_t class_id; + u_int16_t class_id; + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); + assert(ndpi_str); assert(sc); - ndpi_domain_classify_add(sc, NDPI_PROTOCOL_NTOP, ".ntop.org"); - ndpi_domain_classify_add(sc, NDPI_PROTOCOL_NTOP, domain); - assert(ndpi_domain_classify_contains(sc, &class_id, domain)); + ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_NTOP, ".ntop.org"); + ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_NTOP, domain); + assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, domain)); - ndpi_domain_classify_add(sc, NDPI_PROTOCOL_CATEGORY_GAMBLING, "123vc.club"); - assert(ndpi_domain_classify_contains(sc, &class_id, "123vc.club")); + ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_CATEGORY_GAMBLING, "123vc.club"); + assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, "123vc.club")); assert(class_id == NDPI_PROTOCOL_CATEGORY_GAMBLING); /* Subdomain check */ - assert(ndpi_domain_classify_contains(sc, &class_id, "blog.ntop.org")); + assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, "blog.ntop.org")); assert(class_id == NDPI_PROTOCOL_NTOP); #ifdef DEBUG_TRACE struct stat st; if(stat(fname, &st) == 0) { - u_int32_t s = ndpi_domain_classify_size(sc); + u_int32_t s = ndpi_domain_classify_size(ndpi_str, sc); printf("Size: %u [%.1f %% of the original filename size]\n", s, (float)(s * 100) / (float)st.st_size); @@ -5857,20 +5902,26 @@ void domainSearchUnitTest() { #endif ndpi_domain_classify_free(sc); + ndpi_exit_detection_module(ndpi_str); } /* *********************************************** */ void domainSearchUnitTest2() { + struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL); ndpi_domain_classify *c = ndpi_domain_classify_alloc(); - u_int8_t class_id = 9; + u_int16_t class_id = 9; - ndpi_domain_classify_add(c, class_id, "ntop.org"); - ndpi_domain_classify_add(c, class_id, "apple.com"); + assert(ndpi_str); + assert(c); - assert(!ndpi_domain_classify_contains(c, &class_id, "ntop.com")); + ndpi_domain_classify_add(ndpi_str, c, class_id, "ntop.org"); + ndpi_domain_classify_add(ndpi_str, c, class_id, "apple.com"); + + assert(!ndpi_domain_classify_hostname(ndpi_str, c, &class_id, "ntop.com")); ndpi_domain_classify_free(c); + ndpi_exit_detection_module(ndpi_str); } /* *********************************************** */ @@ -5913,6 +5964,7 @@ int main(int argc, char **argv) { exit(0); #endif + encodeDomainsUnitTest(); loadStressTest(); domainsUnitTest(); outlierUnitTest(); |