diff options
Diffstat (limited to 'src/lib/ndpi_domain_classify.c')
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 222 |
1 files changed, 62 insertions, 160 deletions
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index fce10d072..f62800527 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -1,7 +1,7 @@ /* * ndpi_domain_classify.c * - * Copyright (C) 2011-23 - ntop.org and contributors + * Copyright (C) 2011-24 - ntop.org and contributors * * nDPI is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -27,40 +27,31 @@ #include "ndpi_config.h" #include "ndpi_api.h" -#if 0 -#define DEBUG_ADD -#define DEBUG_CONTAINS -#endif +#define ENCODE_DATA /* ********************************************************** */ ndpi_domain_classify* ndpi_domain_classify_alloc() { - int i; - ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify)); + ndpi_domain_classify *s = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify)); - if(!cat) + if(!s) return NULL; - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) - cat->classes[i].class_id = 0, cat->classes[i].domains = NULL; + if(ndpi_hash_init(&s->domains) != 0) { + ndpi_free(s); + return(NULL); + } - return((ndpi_domain_classify*)cat); + return((ndpi_domain_classify*)s); } /* ********************************************************** */ void ndpi_domain_classify_free(ndpi_domain_classify *s) { - u_int32_t i; - if(!s) return; - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->classes[i].domains != NULL) { - ndpi_bitmap64_fuse_free(s->classes[i].domains); - } else - break; - } + ndpi_hash_free(&s->domains); ndpi_free(s); } @@ -68,28 +59,26 @@ void ndpi_domain_classify_free(ndpi_domain_classify *s) { /* ********************************************************** */ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s) { - u_int32_t i, tot_len = sizeof(ndpi_domain_classify); + u_int32_t tot_len = sizeof(ndpi_domain_classify); if(!s) return(0); - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->classes[i].domains != NULL) { - tot_len += ndpi_bitmap64_fuse_size(s->classes[i].domains); - } else - break; - } + /* TODO */ return(tot_len); } /* ********************************************************** */ -bool ndpi_domain_classify_add(ndpi_domain_classify *s, - u_int8_t class_id, - const char *domain) { - u_int32_t i; - u_int64_t hash; +bool ndpi_domain_classify_add(struct ndpi_detection_module_struct *ndpi_str, + ndpi_domain_classify *s, + u_int16_t class_id, + char *domain) { +#ifdef ENCODE_DATA + u_int32_t out_len; + char out[256]; +#endif if((!s) || (!domain)) return(false); @@ -97,43 +86,27 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, /* Skip initial string . in domain names */ while(domain[0] == '.') domain++; -#if 0 - char *dot = strrchr(domain, '.'); + //printf("%s\n", domain); + // fprintf(stdout, "."); fflush(stdout); - if(dot) { - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) - return(false); - } -#endif +#ifdef ENCODE_DATA + out_len = ndpi_encode_domain(ndpi_str, domain, out, sizeof(out)); - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->classes[i].class_id == class_id) { - break; - } else if(s->classes[i].class_id == 0) { - s->classes[i].class_id = class_id; - s->classes[i].domains = ndpi_bitmap64_fuse_alloc(); - - if(!s->classes[i].domains) - s->classes[i].class_id = 0; - - break; - } - } - - if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS) - return(false); - - hash = ndpi_quick_hash64(domain, strlen(domain)); + ndpi_hash_add_entry(&s->domains, out, out_len, class_id); +#else + ndpi_hash_add_entry(&s->domains, domain, strlen(domain), class_id); +#endif - return(ndpi_bitmap64_fuse_set(s->classes[i].domains, hash)); + return(true); } /* ********************************************************** */ -u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, - u_int8_t class_id, +u_int32_t ndpi_domain_classify_add_domains(struct ndpi_detection_module_struct *ndpi_mod, + ndpi_domain_classify *s, + u_int16_t class_id, char *file_path) { - u_int32_t i, num_added = 0; + u_int32_t num_added = 0; char buf[256]; FILE *fd; char *line; @@ -141,30 +114,12 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, if((!s) || (!file_path)) return(false); - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->classes[i].class_id == class_id) { - break; - } else if(s->classes[i].class_id == 0) { - s->classes[i].class_id = class_id; - s->classes[i].domains = ndpi_bitmap64_fuse_alloc(); - if(!s->classes[i].domains) - s->classes[i].class_id = 0; - break; - } - } - - if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS) - return(false); - - /* *************************************** */ - fd = fopen(file_path, "r"); if(fd == NULL) return(false); while((line = fgets(buf, sizeof(buf), fd)) != NULL) { u_int len; - u_int64_t hash; if((line[0] == '#') || (line[0] == '\0')) continue; @@ -177,9 +132,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, line[len] = '\0'; } - hash = ndpi_quick_hash64(line, strlen(line)); - - if(ndpi_bitmap64_fuse_set(s->classes[i].domains, hash)) + if(ndpi_domain_classify_add(ndpi_mod, s, class_id, line)) num_added++; } @@ -191,104 +144,53 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, /* ********************************************************** */ bool ndpi_domain_classify_finalize(ndpi_domain_classify *s) { - u_int32_t i; - if(!s) return(false); - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->classes[i].class_id != 0) { - ndpi_bitmap64_fuse_compress(s->classes[i].domains); - } - } return(true); } /* ********************************************************** */ -static bool is_valid_domain_char(u_char c) { - if(((c >= 'A') && (c <= 'Z')) - || ((c >= 'a') && (c <= 'z')) - || ((c >= '0') && (c <= '9')) - || (c == '_') - || (c == '-') - || (c == '.')) - return(true); - else - return(false); -} - -/* ********************************************************** */ +bool ndpi_domain_classify_hostname(struct ndpi_detection_module_struct *ndpi_mod, + ndpi_domain_classify *s, + u_int16_t *class_id /* out */, + char *hostname) { + u_int32_t len; + const char *dot; + char *item; -const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, - u_int8_t *class_id /* out */, - const char *hostname, - bool return_subprefix) { - u_int32_t i, len; - const char *dot, *elem, *prev_elem; + // ndpi_enable_loaded_categories(ndpi_mod); /* Make sure they have been enabled */ *class_id = 0; /* Unknown class_id */ - if(!hostname || !s) return(hostname); - if((len = strlen(hostname)) == 0) return(hostname); - if((dot = strrchr(hostname, '.')) == NULL) return(hostname); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(hostname); + if(!hostname || !s) return(false); + if((len = strlen(hostname)) == 0) return(false); + if((dot = strrchr(hostname, '.')) == NULL) return(false); + if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); - /* This is a number or a numeric IP or similar */ - if(ndpi_isdigit(hostname[len-1]) && isdigit(hostname[0])) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", hostname); -#endif + item = hostname; - return(hostname); - } + while(true) { + char *next; - if(!is_valid_domain_char(hostname[0])) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", hostname); -#endif + /* This looks like a match so let's check the hash now */ +#ifdef ENCODE_DATA + char out[256]; + u_int32_t out_len = ndpi_encode_domain(ndpi_mod, item, out, sizeof(out)); - return(hostname); - } - - elem = prev_elem = hostname; - - while(elem != NULL) { - u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem)); - - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->classes[i].class_id != 0) { - if(ndpi_bitmap64_fuse_isset(s->classes[i].domains, hash)) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s = %d [%llu]\n", - hostname, s->classes[i].class_id, hash); + if(ndpi_hash_find_entry(s->domains, out, out_len, class_id) == 0) + return(true); +#else + if(ndpi_hash_find_entry(s->domains, item, strlen(item), class_id) == 0) + return(true); #endif - *class_id = s->classes[i].class_id; - return(return_subprefix ? prev_elem : elem); - } - } else - break; - } - - prev_elem = elem; - elem = strchr(elem, '.'); - if(elem == NULL) break; - // if(elem == dot) break; + next = strchr(item, '.'); - elem = &elem[1]; - } /* while */ + if(!next) break; else item = &next[1]; + } /* Not found */ - return(hostname); -} - -/* ********************************************************** */ - -bool ndpi_domain_classify_contains(ndpi_domain_classify *s, - u_int8_t *class_id /* out */, - const char *domain) { - (void)ndpi_domain_classify_longest_prefix(s, class_id, domain, false); /* UNUSED */ - - return((*class_id == 0) ? false : true); + return(false); } |