diff options
author | Luca Deri <deri@ntop.org> | 2024-01-27 20:39:47 +0100 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2024-01-27 20:40:27 +0100 |
commit | 65b9c68d7d92f615e2df5df0bb687dd7cfd7ac56 (patch) | |
tree | e81ce59a60fd64079423859503a5bb421d9cb020 /src | |
parent | bcca89b78cdbc09d73987e5bdf728299f776c7a3 (diff) |
Fixed loading of non-ICANN domains that caused false positives with ndpi_load_domain_suffixes
Minor hash optimization
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 41 | ||||
-rw-r--r-- | src/lib/ndpi_domains.c | 4 | ||||
-rw-r--r-- | src/lib/ndpi_hash.c | 2 |
3 files changed, 30 insertions, 17 deletions
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index cfb2d7baa..c4a9a692f 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -32,6 +32,8 @@ #define DEBUG_CONTAINS #endif +/* ********************************************************** */ + ndpi_domain_classify* ndpi_domain_classify_alloc() { int i; ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify)); @@ -41,7 +43,7 @@ ndpi_domain_classify* ndpi_domain_classify_alloc() { for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) cat->classes[i].class_id = 0, cat->classes[i].domains = NULL; - + return((ndpi_domain_classify*)cat); } @@ -88,6 +90,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, const char *domain) { u_int32_t i; char *dot; + u_int64_t hash; if((!s) || (!domain)) return(false); @@ -97,18 +100,18 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, dot = strrchr(domain, '.'); - if(dot) { + if(dot) { if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); } - + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id == class_id) { - break; + break; } else if(s->classes[i].class_id == 0) { s->classes[i].class_id = class_id; s->classes[i].domains = ndpi_bitmap64_alloc(); - + if(!s->classes[i].domains) s->classes[i].class_id = 0; @@ -119,8 +122,14 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS) return(false); - return(ndpi_bitmap64_set(s->classes[i].domains, - ndpi_quick_hash64(domain, strlen(domain)))); + hash = ndpi_quick_hash64(domain, strlen(domain)); + +#ifdef DEBUG_ADD + if(strcmp(domain, "execute-api.eu-north-1.amazonaws.com") == 0) + printf("[add] %s = %d [%llu]\n", domain, s->classes[i].class_id, hash); +#endif + + return(ndpi_bitmap64_set(s->classes[i].domains, hash)); } /* ********************************************************** */ @@ -138,7 +147,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id == class_id) { - break; + break; } else if(s->classes[i].class_id == 0) { s->classes[i].class_id = class_id; s->classes[i].domains = ndpi_bitmap64_alloc(); @@ -160,7 +169,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, while((line = fgets(buf, sizeof(buf), fd)) != NULL) { u_int len; u_int64_t hash; - + if((line[0] == '#') || (line[0] == '\0')) continue; else { @@ -174,7 +183,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, hash = ndpi_quick_hash64(line, strlen(line)); - if(ndpi_bitmap64_set(s->classes[i].domains, hash)) + if(ndpi_bitmap64_set(s->classes[i].domains, hash)) num_added++; } @@ -223,7 +232,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, const char *dot, *elem, *prev_elem; *class_id = 0; /* Unknown class_id */ - + if(!hostname || !s) return(hostname); if((len = strlen(hostname)) == 0) return(hostname); if((dot = strrchr(hostname, '.')) == NULL) return(hostname); @@ -247,15 +256,16 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, } elem = prev_elem = hostname; - + while(elem != NULL) { u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem)); - + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id != 0) { if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) { #ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", hostname, s->classes[i].class_id); + printf("[contains] %s = %d [%llu]\n", + hostname, s->classes[i].class_id, hash); #endif *class_id = s->classes[i].class_id; return(return_subprefix ? prev_elem : elem); @@ -270,7 +280,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, if(elem == NULL) break; // if(elem == dot) break; - elem = &elem[1]; + elem = &elem[1]; } /* while */ /* Not found */ @@ -286,4 +296,3 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s, return((*class_id == 0) ? false : true); } - diff --git a/src/lib/ndpi_domains.c b/src/lib/ndpi_domains.c index 3c59e3cd9..e7b283e54 100644 --- a/src/lib/ndpi_domains.c +++ b/src/lib/ndpi_domains.c @@ -47,6 +47,10 @@ int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str, while((line = fgets(buf, sizeof(buf), fd)) != NULL) { u_int offset, len; + + /* Skip private domains */ + if(strstr(line, "// ===END ICANN DOMAINS===")) + break; /* Skip empty lines or comments */ if((line[0] == '\0') || (line[0] == '/') || (line[0] == '\n') || (line[0] == '\r')) diff --git a/src/lib/ndpi_hash.c b/src/lib/ndpi_hash.c index c0d653142..426b81605 100644 --- a/src/lib/ndpi_hash.c +++ b/src/lib/ndpi_hash.c @@ -56,7 +56,7 @@ u_int64_t ndpi_quick_hash64(const char *str, u_int str_len) { for(i=0; i<str_len; i++) h = (h * 177) + str[i]; - h ^= strlen(str); + h ^= str_len; return h; } |