diff options
-rw-r--r-- | example/ndpiReader.c | 11 | ||||
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 41 | ||||
-rw-r--r-- | src/lib/ndpi_domains.c | 4 | ||||
-rw-r--r-- | src/lib/ndpi_hash.c | 2 |
4 files changed, 37 insertions, 21 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index c516d9b13..d4ff82b95 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -595,7 +595,8 @@ static void help(u_int long_help) { " | 0 - List known protocols\n" " | 1 - List known categories\n" " | 2 - List known risks\n" - " -d | Disable protocol guess (by ip and by port) and use only DPI. It is a shortcut to --cfg=dpi.guess_on_giveup,0\n" + " -d | Disable protocol guess (by ip and by port) and use only DPI.\n" + " | It is a shortcut to --cfg=dpi.guess_on_giveup,0\n" " -e <len> | Min human readeable string match len. Default %u\n" " -q | Quiet mode\n" " -F | Enable flow stats\n" @@ -636,8 +637,9 @@ static void help(u_int long_help) { " -x <domain> | Check domain name [Test only]\n" " -I | Ignore VLAN id for flow hash calculation\n" " -A | Dump internal statistics (LRU caches / Patricia trees / Ahocarasick automas / ...\n" - " -M | Memory allocation stats on data-path (only by the library). It works only on single-thread configuration\n" - " --cfg=proto,param,value | Configure the specific attribute of this protocol\n" + " -M | Memory allocation stats on data-path (only by the library).\n" + " | It works only on single-thread configuration\n" + " --cfg=proto,param,value | Configure the specific attribute of this protocol\n" , human_readeable_string_len, min_pattern_len, max_pattern_len, max_num_packets_per_flow, max_packet_payload_dissection, @@ -5703,7 +5705,7 @@ void domainsUnitTest() { ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all); assert(ndpi_load_domain_suffixes(ndpi_info_mod, "../lists/public_suffix_list.dat") == 0); - + assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.chosei.chiba.jp"), "chosei.chiba.jp") == 0); assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.unipi.it"), "it") == 0); assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "mail.apple.com"), "com") == 0); @@ -5713,6 +5715,7 @@ void domainsUnitTest() { assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.unipi.it"), "unipi.it") == 0); assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "mail.apple.com"), "apple.com") == 0); assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.bbc.co.uk"), "bbc.co.uk") == 0); + assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0); } ndpi_exit_detection_module(ndpi_info_mod); diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index cfb2d7baa..c4a9a692f 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -32,6 +32,8 @@ #define DEBUG_CONTAINS #endif +/* ********************************************************** */ + ndpi_domain_classify* ndpi_domain_classify_alloc() { int i; ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify)); @@ -41,7 +43,7 @@ ndpi_domain_classify* ndpi_domain_classify_alloc() { for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) cat->classes[i].class_id = 0, cat->classes[i].domains = NULL; - + return((ndpi_domain_classify*)cat); } @@ -88,6 +90,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, const char *domain) { u_int32_t i; char *dot; + u_int64_t hash; if((!s) || (!domain)) return(false); @@ -97,18 +100,18 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, dot = strrchr(domain, '.'); - if(dot) { + if(dot) { if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); } - + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id == class_id) { - break; + break; } else if(s->classes[i].class_id == 0) { s->classes[i].class_id = class_id; s->classes[i].domains = ndpi_bitmap64_alloc(); - + if(!s->classes[i].domains) s->classes[i].class_id = 0; @@ -119,8 +122,14 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s, if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS) return(false); - return(ndpi_bitmap64_set(s->classes[i].domains, - ndpi_quick_hash64(domain, strlen(domain)))); + hash = ndpi_quick_hash64(domain, strlen(domain)); + +#ifdef DEBUG_ADD + if(strcmp(domain, "execute-api.eu-north-1.amazonaws.com") == 0) + printf("[add] %s = %d [%llu]\n", domain, s->classes[i].class_id, hash); +#endif + + return(ndpi_bitmap64_set(s->classes[i].domains, hash)); } /* ********************************************************** */ @@ -138,7 +147,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id == class_id) { - break; + break; } else if(s->classes[i].class_id == 0) { s->classes[i].class_id = class_id; s->classes[i].domains = ndpi_bitmap64_alloc(); @@ -160,7 +169,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, while((line = fgets(buf, sizeof(buf), fd)) != NULL) { u_int len; u_int64_t hash; - + if((line[0] == '#') || (line[0] == '\0')) continue; else { @@ -174,7 +183,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, hash = ndpi_quick_hash64(line, strlen(line)); - if(ndpi_bitmap64_set(s->classes[i].domains, hash)) + if(ndpi_bitmap64_set(s->classes[i].domains, hash)) num_added++; } @@ -223,7 +232,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, const char *dot, *elem, *prev_elem; *class_id = 0; /* Unknown class_id */ - + if(!hostname || !s) return(hostname); if((len = strlen(hostname)) == 0) return(hostname); if((dot = strrchr(hostname, '.')) == NULL) return(hostname); @@ -247,15 +256,16 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, } elem = prev_elem = hostname; - + while(elem != NULL) { u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem)); - + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { if(s->classes[i].class_id != 0) { if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) { #ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", hostname, s->classes[i].class_id); + printf("[contains] %s = %d [%llu]\n", + hostname, s->classes[i].class_id, hash); #endif *class_id = s->classes[i].class_id; return(return_subprefix ? prev_elem : elem); @@ -270,7 +280,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s, if(elem == NULL) break; // if(elem == dot) break; - elem = &elem[1]; + elem = &elem[1]; } /* while */ /* Not found */ @@ -286,4 +296,3 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s, return((*class_id == 0) ? false : true); } - diff --git a/src/lib/ndpi_domains.c b/src/lib/ndpi_domains.c index 3c59e3cd9..e7b283e54 100644 --- a/src/lib/ndpi_domains.c +++ b/src/lib/ndpi_domains.c @@ -47,6 +47,10 @@ int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str, while((line = fgets(buf, sizeof(buf), fd)) != NULL) { u_int offset, len; + + /* Skip private domains */ + if(strstr(line, "// ===END ICANN DOMAINS===")) + break; /* Skip empty lines or comments */ if((line[0] == '\0') || (line[0] == '/') || (line[0] == '\n') || (line[0] == '\r')) diff --git a/src/lib/ndpi_hash.c b/src/lib/ndpi_hash.c index c0d653142..426b81605 100644 --- a/src/lib/ndpi_hash.c +++ b/src/lib/ndpi_hash.c @@ -56,7 +56,7 @@ u_int64_t ndpi_quick_hash64(const char *str, u_int str_len) { for(i=0; i<str_len; i++) h = (h * 177) + str[i]; - h ^= strlen(str); + h ^= str_len; return h; } |