aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--example/ndpiReader.c11
-rw-r--r--src/lib/ndpi_domain_classify.c41
-rw-r--r--src/lib/ndpi_domains.c4
-rw-r--r--src/lib/ndpi_hash.c2
4 files changed, 37 insertions, 21 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index c516d9b13..d4ff82b95 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -595,7 +595,8 @@ static void help(u_int long_help) {
" | 0 - List known protocols\n"
" | 1 - List known categories\n"
" | 2 - List known risks\n"
- " -d | Disable protocol guess (by ip and by port) and use only DPI. It is a shortcut to --cfg=dpi.guess_on_giveup,0\n"
+ " -d | Disable protocol guess (by ip and by port) and use only DPI.\n"
+ " | It is a shortcut to --cfg=dpi.guess_on_giveup,0\n"
" -e <len> | Min human readeable string match len. Default %u\n"
" -q | Quiet mode\n"
" -F | Enable flow stats\n"
@@ -636,8 +637,9 @@ static void help(u_int long_help) {
" -x <domain> | Check domain name [Test only]\n"
" -I | Ignore VLAN id for flow hash calculation\n"
" -A | Dump internal statistics (LRU caches / Patricia trees / Ahocarasick automas / ...\n"
- " -M | Memory allocation stats on data-path (only by the library). It works only on single-thread configuration\n"
- " --cfg=proto,param,value | Configure the specific attribute of this protocol\n"
+ " -M | Memory allocation stats on data-path (only by the library).\n"
+ " | It works only on single-thread configuration\n"
+ " --cfg=proto,param,value | Configure the specific attribute of this protocol\n"
,
human_readeable_string_len,
min_pattern_len, max_pattern_len, max_num_packets_per_flow, max_packet_payload_dissection,
@@ -5703,7 +5705,7 @@ void domainsUnitTest() {
ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all);
assert(ndpi_load_domain_suffixes(ndpi_info_mod, "../lists/public_suffix_list.dat") == 0);
-
+
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.chosei.chiba.jp"), "chosei.chiba.jp") == 0);
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.unipi.it"), "it") == 0);
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "mail.apple.com"), "com") == 0);
@@ -5713,6 +5715,7 @@ void domainsUnitTest() {
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.unipi.it"), "unipi.it") == 0);
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "mail.apple.com"), "apple.com") == 0);
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.bbc.co.uk"), "bbc.co.uk") == 0);
+ assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0);
}
ndpi_exit_detection_module(ndpi_info_mod);
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index cfb2d7baa..c4a9a692f 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -32,6 +32,8 @@
#define DEBUG_CONTAINS
#endif
+/* ********************************************************** */
+
ndpi_domain_classify* ndpi_domain_classify_alloc() {
int i;
ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify));
@@ -41,7 +43,7 @@ ndpi_domain_classify* ndpi_domain_classify_alloc() {
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++)
cat->classes[i].class_id = 0, cat->classes[i].domains = NULL;
-
+
return((ndpi_domain_classify*)cat);
}
@@ -88,6 +90,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
const char *domain) {
u_int32_t i;
char *dot;
+ u_int64_t hash;
if((!s) || (!domain))
return(false);
@@ -97,18 +100,18 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
dot = strrchr(domain, '.');
- if(dot) {
+ if(dot) {
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
}
-
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].class_id == class_id) {
- break;
+ break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
s->classes[i].domains = ndpi_bitmap64_alloc();
-
+
if(!s->classes[i].domains)
s->classes[i].class_id = 0;
@@ -119,8 +122,14 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
return(false);
- return(ndpi_bitmap64_set(s->classes[i].domains,
- ndpi_quick_hash64(domain, strlen(domain))));
+ hash = ndpi_quick_hash64(domain, strlen(domain));
+
+#ifdef DEBUG_ADD
+ if(strcmp(domain, "execute-api.eu-north-1.amazonaws.com") == 0)
+ printf("[add] %s = %d [%llu]\n", domain, s->classes[i].class_id, hash);
+#endif
+
+ return(ndpi_bitmap64_set(s->classes[i].domains, hash));
}
/* ********************************************************** */
@@ -138,7 +147,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].class_id == class_id) {
- break;
+ break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
s->classes[i].domains = ndpi_bitmap64_alloc();
@@ -160,7 +169,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
u_int len;
u_int64_t hash;
-
+
if((line[0] == '#') || (line[0] == '\0'))
continue;
else {
@@ -174,7 +183,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
hash = ndpi_quick_hash64(line, strlen(line));
- if(ndpi_bitmap64_set(s->classes[i].domains, hash))
+ if(ndpi_bitmap64_set(s->classes[i].domains, hash))
num_added++;
}
@@ -223,7 +232,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
const char *dot, *elem, *prev_elem;
*class_id = 0; /* Unknown class_id */
-
+
if(!hostname || !s) return(hostname);
if((len = strlen(hostname)) == 0) return(hostname);
if((dot = strrchr(hostname, '.')) == NULL) return(hostname);
@@ -247,15 +256,16 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
}
elem = prev_elem = hostname;
-
+
while(elem != NULL) {
u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem));
-
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].class_id != 0) {
if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) {
#ifdef DEBUG_CONTAINS
- printf("[contains] %s = %d\n", hostname, s->classes[i].class_id);
+ printf("[contains] %s = %d [%llu]\n",
+ hostname, s->classes[i].class_id, hash);
#endif
*class_id = s->classes[i].class_id;
return(return_subprefix ? prev_elem : elem);
@@ -270,7 +280,7 @@ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
if(elem == NULL) break;
// if(elem == dot) break;
- elem = &elem[1];
+ elem = &elem[1];
} /* while */
/* Not found */
@@ -286,4 +296,3 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
return((*class_id == 0) ? false : true);
}
-
diff --git a/src/lib/ndpi_domains.c b/src/lib/ndpi_domains.c
index 3c59e3cd9..e7b283e54 100644
--- a/src/lib/ndpi_domains.c
+++ b/src/lib/ndpi_domains.c
@@ -47,6 +47,10 @@ int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str,
while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
u_int offset, len;
+
+ /* Skip private domains */
+ if(strstr(line, "// ===END ICANN DOMAINS==="))
+ break;
/* Skip empty lines or comments */
if((line[0] == '\0') || (line[0] == '/') || (line[0] == '\n') || (line[0] == '\r'))
diff --git a/src/lib/ndpi_hash.c b/src/lib/ndpi_hash.c
index c0d653142..426b81605 100644
--- a/src/lib/ndpi_hash.c
+++ b/src/lib/ndpi_hash.c
@@ -56,7 +56,7 @@ u_int64_t ndpi_quick_hash64(const char *str, u_int str_len) {
for(i=0; i<str_len; i++)
h = (h * 177) + str[i];
- h ^= strlen(str);
+ h ^= str_len;
return h;
}