aboutsummaryrefslogtreecommitdiff
path: root/example/ndpiReader.c
diff options
context:
space:
mode:
authorLuca Deri <lucaderi@users.noreply.github.com>2024-04-18 23:21:40 +0200
committerGitHub <noreply@github.com>2024-04-18 23:21:40 +0200
commitad117bfaabd3bc75dc70d0ddbc4ba18c86c40dbd (patch)
tree3b1fb6016da1e114bca190ed6a868421fd9c32f1 /example/ndpiReader.c
parent108b8331d5b345e110c9ef110a6aa95a2767a640 (diff)
Domain Classification Improvements (#2396)
* Added size_t ndpi_compress_str(const char * in, size_t len, char * out, size_t bufsize); size_t ndpi_decompress_str(const char * in, size_t len, char * out, size_t bufsize); used to compress short strings such as domain names. This code is based on https://github.com/Ed-von-Schleck/shoco * Major code rewrite for ndpi_hash and ndpi_domain_classify * Improvements to make sure custom categories are loaded and enabled * Fixed string encoding * Extended SalesForce/Cloudflare domains list
Diffstat (limited to 'example/ndpiReader.c')
-rw-r--r--example/ndpiReader.c160
1 files changed, 106 insertions, 54 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 2621d0465..73d469952 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -1,7 +1,7 @@
/*
* ndpiReader.c
*
- * Copyright (C) 2011-23 - ntop.org
+ * Copyright (C) 2011-24 - ntop.org
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
@@ -377,21 +377,21 @@ void ndpiCheckHostStringMatch(char *testChar) {
testChar, strlen(testChar), &match);
if(testRes) {
- memset( &detected_protocol, 0, sizeof(ndpi_protocol) );
+ memset(&detected_protocol, 0, sizeof(ndpi_protocol) );
detected_protocol.app_protocol = match.protocol_id;
detected_protocol.master_protocol = 0;
detected_protocol.category = match.protocol_category;
- ndpi_protocol2name( ndpi_str, detected_protocol, appBufStr,
- sizeof(appBufStr));
+ ndpi_protocol2name(ndpi_str, detected_protocol, appBufStr,
+ sizeof(appBufStr));
printf("Match Found for string [%s] -> P(%d) B(%d) C(%d) => %s %s %s\n",
testChar, match.protocol_id, match.protocol_breed,
match.protocol_category,
appBufStr,
- ndpi_get_proto_breed_name( match.protocol_breed ),
- ndpi_category_get_name( ndpi_str, match.protocol_category));
+ ndpi_get_proto_breed_name(match.protocol_breed ),
+ ndpi_category_get_name(ndpi_str, match.protocol_category));
} else
printf("Match NOT Found for string: %s\n\n", testChar );
@@ -675,17 +675,17 @@ static void help(u_int long_help) {
max_num_reported_top_payloads, max_num_tcp_dissected_pkts, max_num_udp_dissected_pkts);
NDPI_PROTOCOL_BITMASK all;
- struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
NDPI_BITMASK_SET_ALL(all);
- ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all);
+ ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
if(_protoFilePath != NULL)
- ndpi_load_protocols_file(ndpi_info_mod, _protoFilePath);
+ ndpi_load_protocols_file(ndpi_str, _protoFilePath);
- ndpi_finalize_initialization(ndpi_info_mod);
+ ndpi_finalize_initialization(ndpi_str);
printf("\nProtocols configuration parameters:\n");
- ndpi_dump_config(ndpi_info_mod, stdout);
+ ndpi_dump_config(ndpi_str, stdout);
#ifndef WIN32
printf("\nExcap (wireshark) options:\n"
@@ -712,13 +712,13 @@ static void help(u_int long_help) {
"Id", "Userd-id", "Protocol", "Layer_4", "Nw_Proto", "Breed", "Category");
num_threads = 1;
- ndpi_dump_protocols(ndpi_info_mod, stdout);
+ ndpi_dump_protocols(ndpi_str, stdout);
printf("\n\nnDPI supported risks:\n");
ndpi_dump_risks_score(stdout);
}
- ndpi_exit_detection_module(ndpi_info_mod);
+ ndpi_exit_detection_module(ndpi_str);
exit(!long_help);
}
@@ -839,10 +839,10 @@ void extcap_config() {
ndpi_proto_defaults_t *proto_defaults;
#endif
- struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
#if 0
- ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_info_mod);
- proto_defaults = ndpi_get_proto_defaults(ndpi_info_mod);
+ ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_str);
+ proto_defaults = ndpi_get_proto_defaults(ndpi_str);
#endif
/* -i <interface> */
@@ -876,7 +876,7 @@ void extcap_config() {
ndpi_free(protos);
#endif
- ndpi_exit_detection_module(ndpi_info_mod);
+ ndpi_exit_detection_module(ndpi_str);
extcap_exit = 1;
}
@@ -1347,10 +1347,10 @@ static void parseOptions(int argc, char **argv) {
case '9':
{
- struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
- extcap_packet_filter = ndpi_get_proto_by_name(ndpi_info_mod, optarg);
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
+ extcap_packet_filter = ndpi_get_proto_by_name(ndpi_str, optarg);
if(extcap_packet_filter == NDPI_PROTOCOL_UNKNOWN) extcap_packet_filter = atoi(optarg);
- ndpi_exit_detection_module(ndpi_info_mod);
+ ndpi_exit_detection_module(ndpi_str);
break;
}
@@ -1524,7 +1524,7 @@ void print_bin(FILE *fout, const char *label, struct ndpi_bin *b) {
static void print_ndpi_address_port(FILE *out, const char *label, ndpi_address_port *ap) {
if(ap->port != 0) {
char buf[INET6_ADDRSTRLEN];
-
+
if(ap->is_ipv6) {
inet_ntop(AF_INET6, &ap->address, buf, sizeof(buf));
fprintf(out, "[%s: [%s]:%u]", label, buf, ap->port);
@@ -1892,7 +1892,7 @@ static void printFlow(u_int32_t id, struct ndpi_flow_info *flow, u_int16_t threa
print_ndpi_address_port(out, "Relayed IP/Port", &flow->stun.relayed_address);
print_ndpi_address_port(out, "Rsp Origin IP/Port", &flow->stun.response_origin);
print_ndpi_address_port(out, "Other IP/Port", &flow->stun.other_address);
-
+
if(flow->http.url[0] != '\0') {
ndpi_risk_enum risk = ndpi_validate_url(flow->http.url);
@@ -3883,7 +3883,7 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us
if(!quiet_mode) {
printf("\nnDPI Memory statistics:\n");
printf("\tnDPI Memory (once): %-13s\n", formatBytes(ndpi_get_ndpi_detection_module_size(), buf, sizeof(buf)));
- printf("\tFlow Memory (per flow): %-13s\n", formatBytes( ndpi_detection_get_sizeof_ndpi_flow_struct(), buf, sizeof(buf)));
+ printf("\tFlow Memory (per flow): %-13s\n", formatBytes(ndpi_detection_get_sizeof_ndpi_flow_struct(), buf, sizeof(buf)));
printf("\tActual Memory: %-13s\n", formatBytes(current_ndpi_memory, buf, sizeof(buf)));
printf("\tPeak Memory: %-13s\n", formatBytes(max_ndpi_memory, buf, sizeof(buf)));
printf("\tSetup Time: %lu msec\n", (unsigned long)(setup_time_usec/1000));
@@ -5191,21 +5191,21 @@ void rsiUnitTest() {
void hashUnitTest() {
ndpi_str_hash *h;
char * const dict[] = { "hello", "world", NULL };
- int i;
+ u_int16_t i;
assert(ndpi_hash_init(&h) == 0);
assert(h == NULL);
for(i=0; dict[i] != NULL; i++) {
u_int8_t l = strlen(dict[i]);
- int * v;
+ u_int16_t v;
- assert(ndpi_hash_add_entry(&h, dict[i], l, &i) == 0);
- assert(ndpi_hash_find_entry(h, dict[i], l, (void **)&v) == 0);
- assert(v == (void *)&i && *v == i);
+ assert(ndpi_hash_add_entry(&h, dict[i], l, i) == 0);
+ assert(ndpi_hash_find_entry(h, dict[i], l, &v) == 0);
+ assert(v == i);
}
- ndpi_hash_free(&h, NULL);
+ ndpi_hash_free(&h);
assert(h == NULL);
}
@@ -5797,31 +5797,74 @@ void loadStressTest() {
/* *********************************************** */
+void encodeDomainsUnitTest() {
+ NDPI_PROTOCOL_BITMASK all;
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
+ const char *lists_path = "../lists/public_suffix_list.dat";
+ struct stat st;
+
+ if(stat(lists_path, &st) == 0) {
+ u_int16_t suffix_id;
+ char out[256];
+ char *str;
+ ndpi_protocol_category_t id;
+
+ NDPI_BITMASK_SET_ALL(all);
+ ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
+
+ assert(ndpi_load_domain_suffixes(ndpi_str, (char*)lists_path) == 0);
+
+ ndpi_get_host_domain_suffix(ndpi_str, "lcb.it", &suffix_id); assert(suffix_id == 1117);
+ ndpi_get_host_domain_suffix(ndpi_str, "www.ntop.org", &suffix_id); assert(suffix_id == 4503);
+ ndpi_get_host_domain_suffix(ndpi_str, "www.bbc.co.uk", &suffix_id); assert(suffix_id == 5242);
+
+ str = (char*)"www.ntop.org"; assert(ndpi_encode_domain(ndpi_str, str, out, sizeof(out)) == 8);
+ str = (char*)"www.bbc.co.uk"; assert(ndpi_encode_domain(ndpi_str, str, out, sizeof(out)) == 8);
+
+ assert(ndpi_load_categories_dir(ndpi_str, "../lists"));
+ assert(ndpi_load_categories_file(ndpi_str, "./categories.txt", "categories.txt"));
+
+ str = (char*)"2001:db8:1::1"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100);
+ str = (char*)"www.internetbadguys.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100);
+ str = (char*)"0grand-casino.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107);
+ str = (char*)"222.0grand-casino.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107);
+ str = (char*)"10bet.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107);
+ str = (char*)"www.ntop.org"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == -1); assert(id == 0);
+ str = (char*)"www.andrewpope.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100);
+ }
+
+ ndpi_exit_detection_module(ndpi_str);
+}
+
+/* *********************************************** */
+
void domainsUnitTest() {
NDPI_PROTOCOL_BITMASK all;
- struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
const char *lists_path = "../lists/public_suffix_list.dat";
struct stat st;
if(stat(lists_path, &st) == 0) {
+ u_int16_t suffix_id;
+
NDPI_BITMASK_SET_ALL(all);
- ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all);
+ ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
- assert(ndpi_load_domain_suffixes(ndpi_info_mod, (char*)lists_path) == 0);
+ assert(ndpi_load_domain_suffixes(ndpi_str, (char*)lists_path) == 0);
- assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.chosei.chiba.jp"), "chosei.chiba.jp") == 0);
- assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.unipi.it"), "it") == 0);
- assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "mail.apple.com"), "com") == 0);
- assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.bbc.co.uk"), "co.uk") == 0);
+ assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.chosei.chiba.jp", &suffix_id), "chosei.chiba.jp") == 0);
+ assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.unipi.it", &suffix_id), "it") == 0);
+ assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "mail.apple.com", &suffix_id), "com") == 0);
+ assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.bbc.co.uk", &suffix_id), "co.uk") == 0);
- assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.chosei.chiba.jp"), "www.chosei.chiba.jp") == 0);
- assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.unipi.it"), "unipi.it") == 0);
- assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "mail.apple.com"), "apple.com") == 0);
- assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.bbc.co.uk"), "bbc.co.uk") == 0);
- assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0);
+ assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.chosei.chiba.jp"), "www.chosei.chiba.jp") == 0);
+ assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.unipi.it"), "unipi.it") == 0);
+ assert(strcmp(ndpi_get_host_domain(ndpi_str, "mail.apple.com"), "apple.com") == 0);
+ assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.bbc.co.uk"), "bbc.co.uk") == 0);
+ assert(strcmp(ndpi_get_host_domain(ndpi_str, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0);
}
- ndpi_exit_detection_module(ndpi_info_mod);
+ ndpi_exit_detection_module(ndpi_str);
}
/* *********************************************** */
@@ -5829,27 +5872,29 @@ void domainsUnitTest() {
void domainSearchUnitTest() {
ndpi_domain_classify *sc = ndpi_domain_classify_alloc();
char *domain = "ntop.org";
- u_int8_t class_id;
+ u_int16_t class_id;
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
+ assert(ndpi_str);
assert(sc);
- ndpi_domain_classify_add(sc, NDPI_PROTOCOL_NTOP, ".ntop.org");
- ndpi_domain_classify_add(sc, NDPI_PROTOCOL_NTOP, domain);
- assert(ndpi_domain_classify_contains(sc, &class_id, domain));
+ ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_NTOP, ".ntop.org");
+ ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_NTOP, domain);
+ assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, domain));
- ndpi_domain_classify_add(sc, NDPI_PROTOCOL_CATEGORY_GAMBLING, "123vc.club");
- assert(ndpi_domain_classify_contains(sc, &class_id, "123vc.club"));
+ ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_CATEGORY_GAMBLING, "123vc.club");
+ assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, "123vc.club"));
assert(class_id == NDPI_PROTOCOL_CATEGORY_GAMBLING);
/* Subdomain check */
- assert(ndpi_domain_classify_contains(sc, &class_id, "blog.ntop.org"));
+ assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, "blog.ntop.org"));
assert(class_id == NDPI_PROTOCOL_NTOP);
#ifdef DEBUG_TRACE
struct stat st;
if(stat(fname, &st) == 0) {
- u_int32_t s = ndpi_domain_classify_size(sc);
+ u_int32_t s = ndpi_domain_classify_size(ndpi_str, sc);
printf("Size: %u [%.1f %% of the original filename size]\n",
s, (float)(s * 100) / (float)st.st_size);
@@ -5857,20 +5902,26 @@ void domainSearchUnitTest() {
#endif
ndpi_domain_classify_free(sc);
+ ndpi_exit_detection_module(ndpi_str);
}
/* *********************************************** */
void domainSearchUnitTest2() {
+ struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
ndpi_domain_classify *c = ndpi_domain_classify_alloc();
- u_int8_t class_id = 9;
+ u_int16_t class_id = 9;
- ndpi_domain_classify_add(c, class_id, "ntop.org");
- ndpi_domain_classify_add(c, class_id, "apple.com");
+ assert(ndpi_str);
+ assert(c);
- assert(!ndpi_domain_classify_contains(c, &class_id, "ntop.com"));
+ ndpi_domain_classify_add(ndpi_str, c, class_id, "ntop.org");
+ ndpi_domain_classify_add(ndpi_str, c, class_id, "apple.com");
+
+ assert(!ndpi_domain_classify_hostname(ndpi_str, c, &class_id, "ntop.com"));
ndpi_domain_classify_free(c);
+ ndpi_exit_detection_module(ndpi_str);
}
/* *********************************************** */
@@ -5913,6 +5964,7 @@ int main(int argc, char **argv) {
exit(0);
#endif
+ encodeDomainsUnitTest();
loadStressTest();
domainsUnitTest();
outlierUnitTest();