aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-09-02 19:16:40 +0200
committerLuca Deri <deri@ntop.org>2023-09-02 19:16:40 +0200
commit1d480c18e381f393bd25352c6140e9651f3e3a76 (patch)
tree9a9f9b28b301dc917de817330810cbda1ba0bd75 /src
parent854c2d80f1cf5c567a602ce25f8fd7e166bc4c2f (diff)
Reworked domain classification based on binary filters
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h15
-rw-r--r--src/include/ndpi_typedefs.h47
-rw-r--r--src/lib/ndpi_binary_bitmap.c171
-rw-r--r--src/lib/ndpi_domain_classify.c331
-rw-r--r--src/lib/ndpi_hash.c22
-rw-r--r--src/lib/ndpi_main.c19
6 files changed, 299 insertions, 306 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index b17bf83b4..68db444ad 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1802,6 +1802,7 @@ extern "C" {
u_int32_t ndpi_quick_hash(unsigned char *str, u_int str_len);
u_int32_t ndpi_hash_string(char *str);
+ u_int32_t ndpi_rev_hash_string(char *str);
u_int32_t ndpi_hash_string_len(char *str, u_int len);
/* ******************************* */
@@ -2073,11 +2074,13 @@ extern "C" {
void ndpi_domain_classify_free(ndpi_domain_classify *s);
u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s);
bool ndpi_domain_classify_add(ndpi_domain_classify *s,
- u_int16_t classification_id, char *domain);
- u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
- u_int16_t classification_id,
+ u_int8_t class_id, char *domain);
+ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
+ u_int8_t class_id,
char *file_path);
- u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *s, char *domain);
+ bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
+ u_int8_t *class_id /* out */,
+ char *domain);
/* ******************************* */
@@ -2086,9 +2089,9 @@ extern "C" {
ability to store a category per value (as ndpi_domain_classify)
*/
ndpi_binary_bitmap* ndpi_binary_bitmap_alloc();
- bool ndpi_binary_bitmap_set(ndpi_binary_bitmap *b, u_int32_t value, u_int8_t category);
+ bool ndpi_binary_bitmap_set(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t category);
bool ndpi_binary_bitmap_compress(ndpi_binary_bitmap *b);
- bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int32_t value, u_int8_t *out_category);
+ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t *out_category);
void ndpi_binary_bitmap_free(ndpi_binary_bitmap *b);
u_int32_t ndpi_binary_bitmap_size(ndpi_binary_bitmap *b);
u_int32_t ndpi_binary_bitmap_cardinality(ndpi_binary_bitmap *b);
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 1106f7768..142eed0a2 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -1175,7 +1175,32 @@ typedef struct ndpi_proto {
#define NUM_CUSTOM_CATEGORIES 5
#define CUSTOM_CATEGORY_LABEL_LEN 32
-typedef void ndpi_domain_classify;
+typedef void ndpi_bitmap;
+typedef void ndpi_bitmap_iterator;
+typedef void ndpi_filter;
+
+/* Save memory limiting the key to 56 bit */
+#define SAVE_BINARY_BITMAP_MEMORY
+
+PACK_ON
+struct ndpi_binary_bitmap_entry {
+#ifdef SAVE_BINARY_BITMAP_MEMORY
+ u_int64_t value:56, category:8;
+#else
+ u_int64_t value;
+ u_int8_t category;
+#endif
+} PACK_OFF;
+
+typedef struct {
+ u_int32_t num_allocated_entries, num_used_entries;
+ struct ndpi_binary_bitmap_entry *entries;
+ bool is_compressed;
+} ndpi_binary_bitmap;
+
+typedef struct {
+ ndpi_binary_bitmap *bitmap;
+} ndpi_domain_classify;
#ifdef NDPI_LIB_COMPILATION
@@ -2027,24 +2052,4 @@ typedef int (*ndpi_custom_dga_predict_fctn)(const char* domain, int domain_lengt
/* **************************************** */
-typedef void ndpi_bitmap;
-typedef void ndpi_bitmap_iterator;
-typedef void ndpi_filter;
-
-#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 16
-
-PACK_ON
-struct ndpi_binary_bitmap_entry {
- u_int32_t value;
- u_int8_t category;
-} PACK_OFF;
-
-typedef struct {
- u_int32_t num_allocated_entries, num_used_entries;
- struct ndpi_binary_bitmap_entry *entries;
- bool is_compressed;
-} ndpi_binary_bitmap;
-
-/* **************************************** */
-
#endif /* __NDPI_TYPEDEFS_H__ */
diff --git a/src/lib/ndpi_binary_bitmap.c b/src/lib/ndpi_binary_bitmap.c
new file mode 100644
index 000000000..79f241ae9
--- /dev/null
+++ b/src/lib/ndpi_binary_bitmap.c
@@ -0,0 +1,171 @@
+/*
+ * ndpi_binary_bitmap.c
+ *
+ * Copyright (C) 2011-23 - ntop.org and contributors
+ *
+ * nDPI is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * nDPI is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with nDPI. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <errno.h>
+#include <math.h>
+#include <sys/types.h>
+
+#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
+
+#include "ndpi_config.h"
+#include "ndpi_api.h"
+
+#define NDPI_BINARY_BITMAP_REALLOC_SIZE 4096
+
+// #define PRINT_DUPLICATED_HASHS
+
+/* ********************************************************** */
+
+ndpi_binary_bitmap* ndpi_binary_bitmap_alloc() {
+ ndpi_binary_bitmap *rc = (ndpi_binary_bitmap*)ndpi_malloc(sizeof(ndpi_binary_bitmap));
+
+ if(!rc) return(rc);
+
+ rc->num_allocated_entries = NDPI_BINARY_BITMAP_REALLOC_SIZE, rc->num_used_entries = 0;
+ if((rc->entries = (struct ndpi_binary_bitmap_entry*)ndpi_calloc(rc->num_allocated_entries,
+ sizeof(struct ndpi_binary_bitmap_entry))) == NULL) {
+ ndpi_free(rc);
+ return(NULL);
+ }
+
+ rc->is_compressed = false;
+
+ return(rc);
+}
+
+/* ********************************************************** */
+
+bool ndpi_binary_bitmap_set(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t category) {
+ if(b->num_used_entries >= b->num_allocated_entries) {
+ struct ndpi_binary_bitmap_entry *rc;
+ u_int32_t new_len = b->num_allocated_entries + NDPI_BINARY_BITMAP_REALLOC_SIZE;
+
+ rc = (struct ndpi_binary_bitmap_entry*)ndpi_realloc(b->entries,
+ sizeof(struct ndpi_binary_bitmap_entry)*b->num_allocated_entries,
+ sizeof(struct ndpi_binary_bitmap_entry)*new_len);
+ if(rc == NULL) return(false);
+
+ b->entries = rc, b->num_allocated_entries = new_len;
+ }
+
+ if(value == 0)
+ printf("[add] ZERO hash !!!\n");
+
+ b->entries[b->num_used_entries].value = value,
+ b->entries[b->num_used_entries].category = category;
+ b->num_used_entries++, b->is_compressed = false;
+
+ return(true);
+}
+
+/* ********************************************************** */
+
+static int ndpi_binary_bitmap_entry_compare(const void *_a, const void *_b) {
+ struct ndpi_binary_bitmap_entry *a = (struct ndpi_binary_bitmap_entry*)_a;
+ struct ndpi_binary_bitmap_entry *b = (struct ndpi_binary_bitmap_entry*)_b;
+
+ return(a->value > b->value) - (a->value < b->value);
+}
+
+/* ********************************************************** */
+
+/* Sort and compact memory before searching */
+bool ndpi_binary_bitmap_compress(ndpi_binary_bitmap *b) {
+ u_int32_t i;
+
+ if(b->num_used_entries > 0) {
+ if(b->num_used_entries > 1)
+ qsort(b->entries, b->num_used_entries,
+ sizeof(struct ndpi_binary_bitmap_entry),
+ ndpi_binary_bitmap_entry_compare);
+
+ /* Now remove duplicates */
+ u_int64_t old_value = b->entries[0].value, new_len = 1;
+
+ for(i=1; i<b->num_used_entries; i++) {
+ if(b->entries[i].value != old_value) {
+ if(new_len != i)
+ memcpy(&b->entries[new_len], &b->entries[i], sizeof(struct ndpi_binary_bitmap_entry));
+
+ old_value = b->entries[i].value;
+ new_len++;
+ } else {
+#ifdef PRINT_DUPLICATED_HASHS
+ printf("Skipping duplicate hash %lluu [id: %u/%u]\n",
+ b->entries[i].value, i, b->num_used_entries);
+#endif
+ }
+
+ // printf("Shrinking %u -> %u\n", b->num_used_entries, new_len);
+ }
+
+ b->entries = (struct ndpi_binary_bitmap_entry*)
+ ndpi_realloc(b->entries,
+ sizeof(struct ndpi_binary_bitmap_entry)*b->num_allocated_entries,
+ sizeof(struct ndpi_binary_bitmap_entry)*new_len);
+
+ b->num_used_entries = b->num_allocated_entries = new_len;
+ }
+
+ b->is_compressed = true;
+ return(true);
+}
+
+/* ********************************************************** */
+
+bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t *out_category) {
+ if(!b->is_compressed) ndpi_binary_bitmap_compress(b);
+
+ if(b->num_used_entries > 0) {
+ struct ndpi_binary_bitmap_entry *rc;
+ struct ndpi_binary_bitmap_entry tofind;
+
+ tofind.value = value;
+ rc = (struct ndpi_binary_bitmap_entry*)bsearch(&tofind, b->entries,
+ b->num_used_entries,
+ sizeof(struct ndpi_binary_bitmap_entry),
+ ndpi_binary_bitmap_entry_compare);
+
+ if(rc != NULL) *out_category = rc->category;
+ return(rc == NULL ? false : true);
+ } else
+ return(false);
+}
+
+/* ********************************************************** */
+
+void ndpi_binary_bitmap_free(ndpi_binary_bitmap *b) {
+ ndpi_free(b->entries);
+ ndpi_free(b);
+}
+
+/* ********************************************************** */
+
+u_int32_t ndpi_binary_bitmap_size(ndpi_binary_bitmap *b) {
+ return(sizeof(ndpi_binary_bitmap) + b->num_used_entries * sizeof(struct ndpi_binary_bitmap_entry));
+}
+
+/* ********************************************************** */
+
+u_int32_t ndpi_binary_bitmap_cardinality(ndpi_binary_bitmap *b) {
+ return(b->num_used_entries);
+}
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index f0e6ff7cb..986ebbf07 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -1,5 +1,5 @@
/*
- * ndpi_domain_bitmap.c
+ * ndpi_domain_classify.c
*
* Copyright (C) 2011-23 - ntop.org and contributors
*
@@ -24,298 +24,77 @@
#include <math.h>
#include <sys/types.h>
-#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
-
-#define END_OF_TOKENS_DELIMITER 0x12345678
-#define NUM_DOMAIN_BITMAPS 8
-#define NUM_DOMAIN_BITMAPS_THRESHOLD (NUM_DOMAIN_BITMAPS-1)
-
#include "ndpi_config.h"
#include "ndpi_api.h"
-typedef struct {
- ndpi_bitmap *bitmap[NUM_DOMAIN_BITMAPS];
-} ndpi_domain_search;
-
-typedef struct {
- u_int16_t class_id;
- ndpi_domain_search *domains;
-} ndpi_domain_classify_t;
-
-typedef struct {
- ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
-} ndpi_domain_classifications_t;
-
// #define DEBUG_ADD
// #define DEBUG_CONTAINS
/* ********************************************************** */
-static void ndpi_domain_search_free(ndpi_domain_search *search) {
- u_int16_t i;
-
- for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
- if(search->bitmap[i] == NULL)
- break;
-
- ndpi_bitmap_free(search->bitmap[i]);
- }
-
+void ndpi_domain_classify_free(ndpi_domain_classify *search) {
+ ndpi_binary_bitmap_free(search->bitmap);
ndpi_free(search);
}
/* ********************************************************** */
-static ndpi_domain_search* ndpi_domain_search_alloc() {
- ndpi_domain_search *search = (ndpi_domain_search*)ndpi_calloc(NUM_DOMAIN_BITMAPS, sizeof(ndpi_domain_search));
- u_int16_t i;
+ndpi_domain_classify* ndpi_domain_classify_alloc() {
+ ndpi_domain_classify *search = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify));
if(!search) return(NULL);
- for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
- if((search->bitmap[i] = ndpi_bitmap_alloc()) == NULL)
- goto toobad;
- }
+ if((search->bitmap = ndpi_binary_bitmap_alloc()) == NULL)
+ goto toobad;
return(search);
toobad:
- ndpi_domain_search_free(search);
+ ndpi_domain_classify_free(search);
return(NULL);
}
/* ********************************************************** */
-static u_int32_t ndpi_domain_search_size(ndpi_domain_search *search) {
- u_int32_t i, total_len = 0;
-
- for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
- char *buf;
-
- total_len += ndpi_bitmap_serialize(search->bitmap[i], &buf);
- ndpi_free(buf);
- }
-
- return(total_len);
-}
-
-/* ********************************************************** */
-
-/* NOTE: domain will be modified: copy it if necessary */
-static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) {
- char *elem;
- u_int32_t bitmap_id = 0, len, hsum = 0;
- bool quit = false;
-
- if(domain == NULL) return(false);
- if((len = strlen(domain)) == 0) return(false);
-
- len--;
- while((len > 0)
- && ((domain[len] == '.')
- || (domain[len] == '\n')
- || (domain[len] == '\r'))
- )
- domain[len--] = '\0';
-
- if(domain[0] == '.') ++domain;
-
- elem = strrchr(domain, '.');
- while(elem) {
- u_int32_t h;
-
- if(elem[0] == '.') elem = &elem[1];
-
- h = ndpi_hash_string(elem);
-
- if(elem == domain) {
- /* We're adding the beginning of the domain, hence the last token before quitting */
- h += END_OF_TOKENS_DELIMITER;
- }
-
- ndpi_bitmap_set(search->bitmap[bitmap_id], h + hsum);
-
- bitmap_id++, hsum += h;
-
- if(quit)
- break;
-
- if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
- elem = domain, quit = true; /* Hash the rest of the word */
- else {
- elem[-1] = '\0';
- elem = strrchr(domain, '.');
-
- if(elem == NULL)
- elem = domain, quit = true;
- }
- }
-
- return(bitmap_id);
-}
-
-/* ********************************************************** */
-
-static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain) {
- char *elem;
- u_int32_t bitmap_id = 0, hsum = 0;
- bool quit = false;
-
- if((elem = strrchr(domain, '.')) == NULL)
- return(false); /* This does not look like a domain */
-
- while(elem) {
- u_int32_t h;
-
- if(elem[0] == '.') elem = &elem[1];
-
- h = ndpi_hash_string(elem);
-
- if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) {
- /* Exact match does not work, so let's see if a partial match works instead */
-
- /* We're adding the beginning of the domain, hence the last token before quitting */
- h += END_OF_TOKENS_DELIMITER;
-
- return(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum));
- }
-
- bitmap_id++, hsum += h;
-
- if(quit)
- break;
-
- if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
- elem = domain, quit = true; /* Hash the rest of the word */
- else {
- elem[-1] = '\0';
- elem = strrchr(domain, '.');
-
- if(elem == NULL)
- elem = domain, quit = true;
- }
- }
-
- return(true);
-}
-
-/* ********************************************************** */
-/* ********************************************************** */
-
-ndpi_domain_classify* ndpi_domain_classify_alloc() {
- ndpi_domain_classify_t *cat = (ndpi_domain_classify_t*)ndpi_calloc(1, sizeof(ndpi_domain_classifications_t));
-
- return((ndpi_domain_classify*)cat);
-}
-
-/* ********************************************************** */
-
-void ndpi_domain_classify_free(ndpi_domain_classify *_s) {
- u_int32_t i;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
-
- for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- ndpi_domain_search_free(s->class[i]->domains);
- ndpi_free(s->class[i]);
- } else
- break;
- }
-
- ndpi_free(s);
-}
-
-/* ********************************************************** */
-
-u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) {
- u_int32_t i, tot_len = sizeof(ndpi_domain_classify_t);
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
-
- for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- tot_len += ndpi_domain_search_size(s->class[i]->domains) + sizeof(ndpi_domain_classify_t);
- } else
- break;
- }
-
- return(tot_len);
+u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *c) {
+ return(sizeof(ndpi_domain_classify)+ndpi_binary_bitmap_size(c->bitmap));
}
/* ********************************************************** */
-bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
- u_int16_t class_id,
+bool ndpi_domain_classify_add(ndpi_domain_classify *c,
+ u_int8_t class_id,
char *domain) {
- u_int32_t i;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
- char buf[256], *dot = strrchr(domain, '.');
-
+ u_int64_t hash1, hash2, hash;
+ char *dot = strrchr(domain, '.');
+
if(!dot) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
-
- for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- if(s->class[i]->class_id == class_id) {
- break;
- }
- } else {
- s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
-
- if(s->class[i] == NULL)
- return(false);
-
- s->class[i]->class_id = class_id;
- s->class[i]->domains = ndpi_domain_search_alloc();
- break;
- }
- }
- if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
- return(false);
-
- snprintf(buf, sizeof(buf), "%s", domain);
+ /* Skip heading dots */
+ while(domain[0] == '.') domain++;
+
+ hash1 = ndpi_hash_string(domain), hash2 = ndpi_rev_hash_string(domain);
+ hash = (hash1 << 32) | hash2;
#ifdef DEBUG_ADD
- printf("[add] %s @ %u\n", domain, class_id);
+ printf("[add] %s @ %u [hash: %llu]\n", domain, class_id, hash);
#endif
- return(ndpi_domain_search_add(s->class[i]->domains, buf));
+ return(ndpi_binary_bitmap_set(c->bitmap, hash, class_id));
}
/* ********************************************************** */
-u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
- u_int16_t class_id,
+u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_c,
+ u_int8_t class_id,
char *file_path) {
- u_int32_t i, num_added = 0;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+ u_int32_t num_added = 0;
char buf[256];
FILE *fd;
char *line;
- for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- if(s->class[i]->class_id == class_id) {
- break;
- }
- } else {
- s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
-
- if(s->class[i] == NULL)
- return(false);
-
- s->class[i]->class_id = class_id;
- s->class[i]->domains = ndpi_domain_search_alloc();
- break;
- }
- }
-
- if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
- return(false);
-
- /* *************************************** */
-
fd = fopen(file_path, "r");
if(fd == NULL)
return(false);
@@ -333,8 +112,8 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
else
line[len] = '\0';
}
-
- if(ndpi_domain_search_add(s->class[i]->domains, line))
+
+ if(ndpi_domain_classify_add(_c, class_id, line))
num_added++;
}
@@ -359,16 +138,16 @@ static bool is_valid_domain_char(u_char c) {
/* ********************************************************** */
-u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *_s,
- char *domain) {
- u_int32_t i, len;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
- char *dot;
+bool ndpi_domain_classify_contains(ndpi_domain_classify *c,
+ u_int8_t *class_id /* out */,
+ char *domain) {
+ u_int32_t len;
+ char *dot, *elem;
- if(!domain) return(0);
- if((len = strlen(domain)) == 0) return(0);
- if((dot = strrchr(domain, '.')) == NULL) return(0);
- if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(0);
+ if(!domain) return(false);
+ if((len = strlen(domain)) == 0) return(false);
+ if((dot = strrchr(domain, '.')) == NULL) return(false);
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false);
/* This is a number or a numeric IP or similar */
if(isdigit(domain[len-1]) && isdigit(domain[0])) {
@@ -376,7 +155,7 @@ u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *_s,
printf("[contains] %s INVALID\n", domain);
#endif
- return(0);
+ return(false);
}
if(!is_valid_domain_char(domain[0])) {
@@ -384,28 +163,38 @@ u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *_s,
printf("[contains] %s INVALID\n", domain);
#endif
- return(0);
+ return(false);
}
-
- for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- char buf[256];
-
- snprintf(buf, sizeof(buf), "%s", domain);
-
- if(ndpi_domain_search_contains(s->class[i]->domains, buf)) {
+
+ elem = domain;
+
+ while(true) {
+ u_int64_t hash1, hash2, hash;
+
+ hash1 = ndpi_hash_string(elem), hash2 = ndpi_rev_hash_string(elem);
+ hash = (hash1 << 32) | hash2;
+
#ifdef DEBUG_CONTAINS
- printf("[contains] %s = %d\n", domain, s->class[i]->class_id);
+ printf("[contains] Searching %s [hash: %llu]\n", elem, hash);
#endif
- return(s->class[i]->class_id);
- }
+
+ if(ndpi_binary_bitmap_isset(c->bitmap, hash, class_id)) {
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s = %d\n", domain, *class_id);
+#endif
+ return(true);
}
+
+ if((elem = strchr(elem, '.')) == NULL)
+ break;
+ else
+ elem = &elem[1];
}
#ifdef DEBUG_CONTAINS
printf("[contains] %s NOT FOUND\n", domain);
#endif
- return(0);
+ return(false);
}
diff --git a/src/lib/ndpi_hash.c b/src/lib/ndpi_hash.c
index 4595368ef..8be352e03 100644
--- a/src/lib/ndpi_hash.c
+++ b/src/lib/ndpi_hash.c
@@ -62,6 +62,28 @@ u_int32_t ndpi_hash_string(char *str) {
/* ******************************************************************** */
+u_int32_t ndpi_rev_hash_string(char *str) {
+ u_int32_t hash, i;
+ int len = strlen(str);
+
+ if(len == 0) return(0);
+ len--;
+
+ for(hash = i = 0; len >= 0; len--) {
+ hash += str[len];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return(hash);
+}
+
+/* ******************************************************************** */
+
/* Same as above but with strings with lenght */
u_int32_t ndpi_hash_string_len(char *str, u_int len) {
u_int32_t hash, i;
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 8b1c01ae5..c5a085be6 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -3403,21 +3403,19 @@ int ndpi_match_custom_category(struct ndpi_detection_module_struct *ndpi_str,
return(id != NDPI_PROTOCOL_UNKNOWN ? 0 : -1);
#else
char buf[128];
- u_int16_t rc;
+ u_int8_t class_id;
u_int max_len = sizeof(buf)-1;
if(name_len > max_len) name_len = max_len;
strncpy(buf, name, name_len);
buf[name_len] = '\0';
- rc = ndpi_domain_classify_contains(ndpi_str->custom_categories.sc_hostnames, buf);
-
- if(rc == 0)
- return(-1); /* Not found */
- else {
- *category = (ndpi_protocol_category_t)rc;
+ if(ndpi_domain_classify_contains(ndpi_str->custom_categories.sc_hostnames,
+ &class_id, buf)) {
+ *category = (ndpi_protocol_category_t)class_id;
return(0);
- }
+ } else
+ return(-1); /* Not found */
#endif
}
@@ -4202,7 +4200,12 @@ int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str,
if((len <= 1) || (line[0] == '#'))
continue;
+ else
+ len--;
+ while((line[len] == '\n') || (line[len] == '\r'))
+ line[len--] = '\0';
+
if(ndpi_load_category(ndpi_str, line, category_id, NULL) > 0)
num_loaded++;
}