aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-09-04 12:53:42 +0200
committerLuca Deri <deri@ntop.org>2023-09-04 12:53:42 +0200
commit4f2ce2d43b24bd86eabbed6127c090b3affa0d01 (patch)
treefd823af265167b18e4db7d526bba477b7d8e610e /src
parentf0dc3347ec92a55c16b7033e1b7f2890892b3094 (diff)
Added ndpi_murmur_hash to the nDPI API
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h1
-rw-r--r--src/lib/ndpi_domain_classify.c43
-rw-r--r--src/lib/ndpi_hash.c9
3 files changed, 41 insertions, 12 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 68db444ad..82c7a307b 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1804,6 +1804,7 @@ extern "C" {
u_int32_t ndpi_hash_string(char *str);
u_int32_t ndpi_rev_hash_string(char *str);
u_int32_t ndpi_hash_string_len(char *str, u_int len);
+ u_int32_t ndpi_murmur_hash(char *str, u_int str_len);
/* ******************************* */
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index f5e6752c1..3b458d665 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -52,7 +52,7 @@ ndpi_domain_classify* ndpi_domain_classify_alloc() {
if(!search) return(NULL);
if((search->bitmap = ndpi_binary_bitmap_alloc()) == NULL)
- goto toobad;
+ goto toobad;
return(search);
@@ -74,19 +74,27 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *c,
char *domain) {
u_int64_t hash1, hash2, hash;
char *dot = strrchr(domain, '.');
-
+
+#ifdef DEBUG_ADD
+ printf("[add] Trying to add %s\n", domain);
+#endif
+
if(!dot) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
/* Skip heading dots */
while(domain[0] == '.') domain++;
-
+
hash1 = ndpi_hash_string(domain), hash2 = ndpi_rev_hash_string(domain);
hash = (hash1 << 32) | hash2;
#ifdef DEBUG_ADD
printf("[add] %s @ %u [hash: %llu]\n", domain, class_id, hash);
+
+ if(ndpi_binary_bitmap_isset(c->bitmap, hash, class_id)) {
+ printf("[add] False positive %s @ %u [hash: %llu]\n", domain, class_id, hash);
+ }
#endif
return(ndpi_binary_bitmap_set(c->bitmap, hash, class_id));
@@ -119,7 +127,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_c,
else
line[len] = '\0';
}
-
+
if(ndpi_domain_classify_add(_c, class_id, line))
num_added++;
}
@@ -164,7 +172,7 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *c,
return(false);
}
-
+
if(!is_valid_domain_char(domain[0])) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s INVALID\n", domain);
@@ -180,7 +188,7 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *c,
hash1 = ndpi_hash_string(elem), hash2 = ndpi_rev_hash_string(elem);
hash = (hash1 << 32) | hash2;
-
+
#ifdef DEBUG_CONTAINS
printf("[contains] Searching %s [hash: %llu]\n", elem, hash);
#endif
@@ -310,8 +318,19 @@ static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) {
if(elem == domain) {
/* We're adding the beginning of the domain, hence the last token before quitting */
h += END_OF_TOKENS_DELIMITER;
+
+#ifdef DEBUG_ADD
+ if(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum))
+ printf("[add] False positive while adding %s (%s) [%u][bitmap_id: %u]\n",
+ elem, domain, h + hsum, bitmap_id);
+#endif
}
+#ifdef DEBUG_ADD
+ printf("[add] Trying to add %s [%s][%u][bitmap_id: %u]\n",
+ elem, domain, h + hsum, bitmap_id);
+#endif
+
ndpi_bitmap_set(search->bitmap[bitmap_id], h + hsum);
bitmap_id++, hsum += h;
@@ -342,7 +361,7 @@ static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain
if((elem = strrchr(domain, '.')) == NULL)
return(false); /* This does not look like a domain */
-
+
while(elem) {
u_int32_t h;
@@ -432,7 +451,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
if(!dot) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
-
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->class[i] != NULL) {
if(s->class[i]->class_id == class_id) {
@@ -558,7 +577,7 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s,
return(false);
}
-
+
if(!is_valid_domain_char(domain[0])) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s INVALID\n", domain);
@@ -566,13 +585,13 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s,
return(false);
}
-
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->class[i] != NULL) {
char buf[256];
-
+
snprintf(buf, sizeof(buf), "%s", domain);
-
+
if(ndpi_domain_search_contains(s->class[i]->domains, buf)) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s = %d\n", domain, s->class[i]->class_id);
diff --git a/src/lib/ndpi_hash.c b/src/lib/ndpi_hash.c
index 8be352e03..8d78749fd 100644
--- a/src/lib/ndpi_hash.c
+++ b/src/lib/ndpi_hash.c
@@ -25,6 +25,15 @@
#include "ndpi_config.h"
#include "ndpi_api.h"
+#include "third_party/include/MurmurHash3.h"
+
+/* ******************************************************************** */
+
+/* Based on djb2 hash - http://www.cse.yorku.ca/~oz/hash.html */
+u_int32_t ndpi_murmur_hash(char *str, u_int str_len) {
+ return(MurmurHash((void*)str, str_len, 0x87654321));
+}
+
/* ******************************************************************** */
/* Based on djb2 hash - http://www.cse.yorku.ca/~oz/hash.html */