diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h | 35 | ||||
-rw-r--r-- | src/include/ndpi_private.h | 12 | ||||
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 74 | ||||
-rw-r--r-- | src/lib/ndpi_domains.c | 93 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 9 |
5 files changed, 185 insertions, 38 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index b20305e33..f4c2f6114 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -2147,6 +2147,9 @@ extern "C" { u_int8_t class_id, char *file_path); bool ndpi_domain_classify_finalize(ndpi_domain_classify *s); + const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, + u_int8_t *class_id /* out */, + const char *hostnname); bool ndpi_domain_classify_contains(ndpi_domain_classify *s, u_int8_t *class_id /* out */, const char *domain); @@ -2188,6 +2191,8 @@ extern "C" { /** * Get user data which was previously set with `ndpi_set_user_data()`. * + * @par ndpi_str = the struct created for the protocol detection + * * @return the user data pointer * */ @@ -2195,6 +2200,36 @@ extern "C" { /* ******************************* */ + /** + * Loads the domain suffixes from the specified path. You need to + * perform this action once + * + * @par ndpi_str = the struct created for the protocol detection + * @par public_suffix_list_path = path of the public_suffix_list path + * + * @return 0 = no error, -1 otherwise + * + */ + int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str, + char *public_suffix_list_path); + + /** + * Returns the domain suffix out of the specified hostname. + * The returned pointer is an offset of the original hostname. + * Note that you need to call ndpi_load_domain_suffixes() before + * calling this function. + * + * @par ndpi_str = the struct created for the protocol detection + * @par hostname = the hostname from which the domain name has to be extracted + * + * @return The host domain name or the hostitself if not found. + * + */ + const char* ndpi_get_host_domain_suffix(struct ndpi_detection_module_struct *ndpi_str, + const char *hostname); + + /* ******************************* */ + /* Can't call libc functions from kernel space, define some stub instead */ #define ndpi_isalpha(ch) (((ch) >= 'a' && (ch) <= 'z') || ((ch) >= 'A' && (ch) <= 'Z')) diff --git a/src/include/ndpi_private.h b/src/include/ndpi_private.h index d0adfa362..df3bfaf2c 100644 --- a/src/include/ndpi_private.h +++ b/src/include/ndpi_private.h @@ -152,7 +152,7 @@ struct ndpi_detection_module_struct { u_int16_t num_tls_blocks_to_follow; u_int8_t skip_tls_blocks_until_change_cipher:1, _notused:7; u_int8_t tls_certificate_expire_in_x_days; - + void *user_data; char custom_category_labels[NUM_CUSTOM_CATEGORIES][CUSTOM_CATEGORY_LABEL_LEN]; @@ -206,11 +206,11 @@ struct ndpi_detection_module_struct { /* Patricia trees */ ndpi_patricia_tree_t *ip_risk_mask_ptree; ndpi_patricia_tree_t *ip_risk_mask_ptree6; - ndpi_patricia_tree_t *ip_risk_ptree; + ndpi_patricia_tree_t *ip_risk_ptree; ndpi_patricia_tree_t *ip_risk_ptree6; ndpi_patricia_tree_t *protocols_ptree; /* IP-based protocol detection */ ndpi_patricia_tree_t *protocols_ptree6; - + /* *** If you add a new Patricia tree, please update ptree_type above! *** */ struct { @@ -256,7 +256,7 @@ struct ndpi_detection_module_struct { struct ndpi_lru_cache *tls_cert_cache; u_int32_t tls_cert_cache_num_entries; int32_t tls_cert_cache_ttl; - + /* NDPI_PROTOCOL_MINING and subprotocols */ struct ndpi_lru_cache *mining_cache; u_int32_t mining_cache_num_entries; @@ -302,7 +302,9 @@ struct ndpi_detection_module_struct { nbpf_filter nbpf_custom_proto[MAX_NBPF_CUSTOM_PROTO]; #endif - u_int16_t max_payload_track_len; + u_int16_t max_payload_track_len; + + ndpi_domain_classify *public_domain_suffixes; }; diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index 2ca071ca0..382023947 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -89,7 +89,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, u_int32_t i; char *dot; - if(!s || !domain) + if((!s) || (!domain)) return(false); /* Skip initial string . in domain names */ @@ -97,18 +97,21 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, dot = strrchr(domain, '.'); - if(!dot) return(false); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) - return(false); - + if(dot) { + if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) + return(false); + } + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id == class_id) { break; } else if(s->classes[i].class_id == 0) { s->classes[i].class_id = class_id; s->classes[i].domains = ndpi_bitmap64_alloc(); + if(!s->classes[i].domains) s->classes[i].class_id = 0; + break; } } @@ -130,7 +133,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, FILE *fd; char *line; - if(!s || !file_path) + if((!s) || (!file_path)) return(false); for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { @@ -199,9 +202,9 @@ bool ndpi_domain_classify_finalize(ndpi_domain_classify *s) { /* ********************************************************** */ static bool is_valid_domain_char(u_char c) { - if(((c >= 'A')&& (c <= 'Z')) - || ((c >= 'a')&& (c <= 'z')) - || ((c >= '0')&& (c <= '9')) + if(((c >= 'A') && (c <= 'Z')) + || ((c >= 'a') && (c <= 'z')) + || ((c >= '0') && (c <= '9')) || (c == '_') || (c == '-') || (c == '.')) @@ -212,35 +215,37 @@ static bool is_valid_domain_char(u_char c) { /* ********************************************************** */ -bool ndpi_domain_classify_contains(ndpi_domain_classify *s, - u_int8_t *class_id /* out */, - const char *domain) { +const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, + u_int8_t *class_id /* out */, + const char *hostname) { u_int32_t i, len; const char *dot, *elem; - if(!domain || !s) return(false); - if((len = strlen(domain)) == 0) return(false); - if((dot = strrchr(domain, '.')) == NULL) return(false); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); + *class_id = 0; /* Unknown class_id */ + + if(!hostname || !s) return(hostname); + if((len = strlen(hostname)) == 0) return(hostname); + if((dot = strrchr(hostname, '.')) == NULL) return(hostname); + if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(hostname); /* This is a number or a numeric IP or similar */ - if(ndpi_isdigit(domain[len-1]) && isdigit(domain[0])) { + if(ndpi_isdigit(hostname[len-1]) && isdigit(hostname[0])) { #ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", domain); + printf("[contains] %s INVALID\n", hostname); #endif - return(false); + return(hostname); } - if(!is_valid_domain_char(domain[0])) { + if(!is_valid_domain_char(hostname[0])) { #ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", domain); + printf("[contains] %s INVALID\n", hostname); #endif - return(false); + return(hostname); } - elem = domain; + elem = hostname; while(elem != NULL) { u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem)); @@ -249,10 +254,10 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s, if(s->classes[i].class_id != 0) { if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) { #ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", domain, s->classes[i].class_id); + printf("[contains] %s = %d\n", hostname, s->classes[i].class_id); #endif *class_id = s->classes[i].class_id; - return(true); + return(elem); } } else break; @@ -260,16 +265,23 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s, elem = strchr(elem, '.'); - if((elem == NULL) || (elem == dot)) + if((elem == NULL) /* || (elem == dot) */) break; else elem = &elem[1]; } /* while */ - -#ifdef DEBUG_CONTAINS - printf("[contains] %s NOT FOUND\n", domain); -#endif - return(false); + /* Not found */ + return(hostname); +} + +/* ********************************************************** */ + +bool ndpi_domain_classify_contains(ndpi_domain_classify *s, + u_int8_t *class_id /* out */, + const char *domain) { + (void)ndpi_domain_classify_longest_prefix(s, class_id, domain); /* UNUSED */ + + return((*class_id == 0) ? false : true); } diff --git a/src/lib/ndpi_domains.c b/src/lib/ndpi_domains.c new file mode 100644 index 000000000..12f735b36 --- /dev/null +++ b/src/lib/ndpi_domains.c @@ -0,0 +1,93 @@ +/* + * ndpi_domains.c + * + * Copyright (C) 2011-24 - ntop.org and contributors + * + * nDPI is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * nDPI is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with nDPI. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "ndpi_config.h" +#include "ndpi_api.h" +#include "ndpi_includes.h" +#include "ndpi_private.h" + +/* ******************************* */ + +int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str, + char *public_suffix_list_path) { + char buf[256], *line; + FILE *fd; + bool do_trace = false; + u_int num_domains = 0; + + if(public_suffix_list_path == NULL) + return(-1); + + if((fd = fopen(public_suffix_list_path, "r")) == NULL) + return(-2); + + if(ndpi_str->public_domain_suffixes != NULL) { + /* An existing license was aleady loaded: free it and start over */ + ndpi_domain_classify_free(ndpi_str->public_domain_suffixes); + } + + if((ndpi_str->public_domain_suffixes = ndpi_domain_classify_alloc()) == NULL) + return(-3); + + while((line = fgets(buf, sizeof(buf), fd)) != NULL) { + u_int offset, len; + + /* Skip empty lines or comments */ + if((line[0] == '\0') || (line[0] == '/') || (line[0] == '\n') || (line[0] == '\r')) + continue; + + if((line[0] == '*') && (line[1] == '.') && (line[2] != '\0')) + offset = 2; + else + offset = 0; + + len = strlen(line) - 1; + while((len > 0) && (line[len] == '\n')) + line[len--] = '\0'; + + if(!ndpi_domain_classify_add(ndpi_str->public_domain_suffixes, + 1 /* dummy */, &line[offset])) { + if(do_trace) printf("Error while processing domain %s\n", &line[offset]); + } else + num_domains++; + } + + if(!ndpi_domain_classify_finalize(ndpi_str->public_domain_suffixes)) { + if(do_trace) printf("Error while finalizing domain processing\n"); + } + + if(do_trace) printf("Loaded %u domains\n", num_domains); + + return(0); +} + +/* ******************************* */ + +const char* ndpi_get_host_domain_suffix(struct ndpi_detection_module_struct *ndpi_str, + const char *hostname) { + if(ndpi_str->public_domain_suffixes == NULL) + return(hostname); + else { + u_int8_t class_id; + + return(ndpi_domain_classify_longest_prefix(ndpi_str->public_domain_suffixes, + &class_id, hostname)); + } +} diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index d870ffe5c..28c53714f 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -4000,9 +4000,14 @@ void ndpi_exit_detection_module(struct ndpi_detection_module_struct *ndpi_str) { ndpi_free_geoip(ndpi_str); if(ndpi_str->callback_buffer) - ndpi_free(ndpi_str->callback_buffer); + ndpi_free(ndpi_str->callback_buffer); + if(ndpi_str->callback_buffer_tcp_payload) - ndpi_free(ndpi_str->callback_buffer_tcp_payload); + ndpi_free(ndpi_str->callback_buffer_tcp_payload); + + if(ndpi_str->public_domain_suffixes) + ndpi_domain_classify_free(ndpi_str->public_domain_suffixes); + ndpi_free(ndpi_str); } |