aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-09-05 17:03:20 +0200
committerLuca Deri <deri@ntop.org>2023-09-05 17:03:20 +0200
commit978df906b38c26a359b2d91089e70e133cc7502e (patch)
treeac44a768d718f0a3265b3d11c01ab9305adc34dd /src
parent0080d0d092e245aa865bf757e682496e97257d25 (diff)
Improved classification further reducing memory used
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h14
-rw-r--r--src/include/ndpi_typedefs.h7
-rw-r--r--src/lib/ndpi_binary_bitmap.c36
-rw-r--r--src/lib/ndpi_bitmap64.c153
-rw-r--r--src/lib/ndpi_content_match.c.inc1
-rw-r--r--src/lib/ndpi_domain_classify.c482
6 files changed, 224 insertions, 469 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 76ff9fadd..612847da4 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -2037,12 +2037,13 @@ extern "C" {
on https://github.com/FastFilter/xor_singleheader/tree/master
*/
- ndpi_bitmap64* ndpi_bitmap64_alloc_size(u_int32_t size);
- void ndpi_bitmap64_free(ndpi_bitmap64* b);
- void ndpi_bitmap64_set(ndpi_bitmap64* b, u_int64_t value);
- bool ndpi_bitmap64_isset(ndpi_bitmap64* b, u_int64_t value);
+ ndpi_bitmap64* ndpi_bitmap64_alloc();
+ bool ndpi_bitmap64_set(ndpi_bitmap64 *b, u_int64_t value);
+ bool ndpi_bitmap64_compress(ndpi_bitmap64 *b);
+ bool ndpi_bitmap64_isset(ndpi_bitmap64 *b, u_int64_t value);
+ void ndpi_bitmap64_free(ndpi_bitmap64 *b);
u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b);
-
+
/* ******************************* */
/*
Bloom-filter on steroids based on ndpi_bitmap
@@ -2110,6 +2111,9 @@ extern "C" {
void ndpi_binary_bitmap_free(ndpi_binary_bitmap *b);
u_int32_t ndpi_binary_bitmap_size(ndpi_binary_bitmap *b);
u_int32_t ndpi_binary_bitmap_cardinality(ndpi_binary_bitmap *b);
+
+ /* ******************************* */
+
/* ******************************* */
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 553440a2b..53535a441 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -1199,8 +1199,13 @@ typedef struct {
bool is_compressed;
} ndpi_binary_bitmap;
+#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 16
+
typedef struct {
- ndpi_binary_bitmap *bitmap;
+ struct {
+ u_int16_t class_id;
+ ndpi_bitmap64 *domains;
+ } classes[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
} ndpi_domain_classify;
#ifdef NDPI_LIB_COMPILATION
diff --git a/src/lib/ndpi_binary_bitmap.c b/src/lib/ndpi_binary_bitmap.c
index 4360e574f..184bb99df 100644
--- a/src/lib/ndpi_binary_bitmap.c
+++ b/src/lib/ndpi_binary_bitmap.c
@@ -47,6 +47,10 @@ ndpi_binary_bitmap* ndpi_binary_bitmap_alloc() {
return(NULL);
}
+#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY
+ rc->bitmap = NULL;
+#endif
+
rc->is_compressed = false;
return(rc);
@@ -131,6 +135,22 @@ bool ndpi_binary_bitmap_compress(ndpi_binary_bitmap *b) {
}
b->is_compressed = true;
+
+#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY
+ if(b->bitmap != NULL) ndpi_bitmap64_free(b->bitmap);
+ b->bitmap = ndpi_bitmap64_alloc_size(b->num_used_entries);
+
+ u_int64_t *values = (u_int64_t*)ndpi_malloc(sizeof(u_int64_t)*b->num_used_entries);
+
+ if(values) {
+ for(i=0; i<b->num_used_entries; i++)
+ values[i] = b->entries[i].value;
+
+ ndpi_bitmap64_multiset(b->bitmap, values, b->num_used_entries);
+ ndpi_free(values);
+ }
+#endif
+
return(true);
}
@@ -140,10 +160,14 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t *
if(!b->is_compressed) ndpi_binary_bitmap_compress(b);
if(b->num_used_entries > 0) {
+#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY
+ return(ndpi_bitmap64_isset(b->bitmap, value));
+#else
struct ndpi_binary_bitmap_entry *rc;
struct ndpi_binary_bitmap_entry tofind;
- tofind.value = value; rc = (struct ndpi_binary_bitmap_entry*)bsearch(&tofind, b->entries,
+ tofind.value = value;
+ rc = (struct ndpi_binary_bitmap_entry*)bsearch(&tofind, b->entries,
b->num_used_entries,
sizeof(struct ndpi_binary_bitmap_entry),
ndpi_binary_bitmap_entry_compare);
@@ -151,6 +175,7 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t *
*out_category = rc->category;
return(rc == NULL ? false : true);
+#endif
} else
return(false);
}
@@ -159,13 +184,22 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t *
void ndpi_binary_bitmap_free(ndpi_binary_bitmap *b) {
ndpi_free(b->entries);
+
+#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY
+ if(b->bitmap != NULL) ndpi_bitmap64_free(b->bitmap);
+#endif
+
ndpi_free(b);
}
/* ********************************************************** */
u_int32_t ndpi_binary_bitmap_size(ndpi_binary_bitmap *b) {
+#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY
+ return(sizeof(ndpi_binary_bitmap) + ndpi_bitmap64_size(b->bitmap));
+#else
return(sizeof(ndpi_binary_bitmap) + b->num_used_entries * sizeof(struct ndpi_binary_bitmap_entry));
+#endif
}
/* ********************************************************** */
diff --git a/src/lib/ndpi_bitmap64.c b/src/lib/ndpi_bitmap64.c
index 75351af9a..ae34a2704 100644
--- a/src/lib/ndpi_bitmap64.c
+++ b/src/lib/ndpi_bitmap64.c
@@ -1,11 +1,8 @@
/*
- * ndpi_bitmap.c
+ * ndpi_bitmap64.c
*
* Copyright (C) 2011-23 - ntop.org and contributors
*
- * This file is part of nDPI, an open source deep packet inspection
- * library based on the OpenDPI and PACE technology by ipoque GmbH
- *
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
@@ -27,53 +24,149 @@
#include <math.h>
#include <sys/types.h>
-#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
+#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
#include "ndpi_config.h"
#include "ndpi_api.h"
-#include "ndpi_includes.h"
-#include "ndpi_encryption.h"
-
#include "third_party/include/binaryfusefilter.h"
-/* ******************************************* */
+#define NDPI_BITMAP64_REALLOC_SIZE 4096
-ndpi_bitmap64* ndpi_bitmap64_alloc_size(u_int32_t num_items) {
- binary_fuse16_t *b = (binary_fuse16_t*)ndpi_malloc(sizeof(binary_fuse16_t));
-
- if(b == NULL) return(NULL);
+// #define PRINT_DUPLICATED_HASHS
+
+typedef struct {
+ u_int32_t num_allocated_entries, num_used_entries;
+ u_int64_t *entries;
+ bool is_compressed;
+ binary_fuse16_t bitmap;
+} ndpi_bitmap64_t;
- if(binary_fuse16_allocate(num_items, b))
- return((ndpi_bitmap64*)b);
- else {
- ndpi_free(b);
+/* ********************************************************** */
+
+ndpi_bitmap64* ndpi_bitmap64_alloc() {
+ ndpi_bitmap64_t *rc = (ndpi_bitmap64_t*)ndpi_malloc(sizeof(ndpi_bitmap64_t));
+
+ if(!rc) return(rc);
+
+ rc->num_allocated_entries = NDPI_BITMAP64_REALLOC_SIZE, rc->num_used_entries = 0;
+ if((rc->entries = (u_int64_t*)ndpi_calloc(rc->num_allocated_entries, sizeof(u_int64_t))) == NULL) {
+ ndpi_free(rc);
return(NULL);
}
+
+ rc->is_compressed = false;
+
+ return((ndpi_bitmap64*)rc);
}
-/* ******************************************* */
+/* ********************************************************** */
-void ndpi_bitmap64_free(ndpi_bitmap64* b) {
- binary_fuse16_free((binary_fuse16_t*)b);
- ndpi_free(b);
+static int ndpi_bitmap64_entry_compare(const void *_a, const void *_b) {
+ u_int64_t *a = (u_int64_t*)_a, *b = (u_int64_t*)_b;
+
+ if(*a < *b) return -1;
+ else if(*a > *b) return 1;
+ else return 0;
}
-/* ******************************************* */
+/* ********************************************************** */
+
+/* Sort and compact memory before searching */
+bool ndpi_bitmap64_compress(ndpi_bitmap64 *_b) {
+ ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+ u_int32_t i;
+
+ if(b->num_used_entries > 0) {
+ if(b->num_used_entries > 1)
+ qsort(b->entries, b->num_used_entries,
+ sizeof(u_int64_t),
+ ndpi_bitmap64_entry_compare);
+
+ /* Now remove duplicates */
+ u_int64_t old_value = b->entries[0], new_len = 1;
+
+ for(i=1; i<b->num_used_entries; i++) {
+ if(b->entries[i] != old_value) {
+ if(new_len != i)
+ memcpy(&b->entries[new_len], &b->entries[i], sizeof(u_int64_t));
+
+ old_value = b->entries[i];
+ new_len++;
+ } else {
+#ifdef PRINT_DUPLICATED_HASHS
+ printf("Skipping duplicate hash %lluu [id: %u/%u]\n",
+ b->entries[i].value, i, b->num_used_entries);
+#endif
+ }
+ }
+
+ b->num_used_entries = b->num_allocated_entries = new_len;
+ }
-void ndpi_bitmap64_set(ndpi_bitmap64* b, u_int64_t value) {
- binary_fuse16_populate(&value, 1, (binary_fuse16_t*)b);
+ if(binary_fuse16_allocate(b->num_used_entries, &b->bitmap)) {
+ if(binary_fuse16_populate(b->entries, b->num_used_entries, &b->bitmap)) {
+ ndpi_free(b->entries), b->num_used_entries = b->num_allocated_entries = 0;
+ b->entries = NULL;
+ } else
+ return(false);
+ } else
+ return(false);
+
+ b->is_compressed = true;
+
+ return(true);
}
-/* ******************************************* */
+/* ********************************************************** */
+
+bool ndpi_bitmap64_set(ndpi_bitmap64 *_b, u_int64_t value) {
+ ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+
+ if(b->num_used_entries >= b->num_allocated_entries) {
+ u_int64_t *rc;
+ u_int32_t new_len = b->num_allocated_entries + NDPI_BITMAP64_REALLOC_SIZE;
+
+ rc = (u_int64_t*)ndpi_realloc(b->entries,
+ sizeof(u_int64_t)*b->num_allocated_entries,
+ sizeof(u_int64_t)*new_len);
+ if(rc == NULL) return(false);
-bool ndpi_bitmap64_isset(ndpi_bitmap64* b, u_int64_t value) {
- return(binary_fuse16_contain(value, (binary_fuse16_t*)b));
+ b->entries = rc, b->num_allocated_entries = new_len;
+ }
+
+ b->entries[b->num_used_entries] = value;
+ b->num_used_entries++, b->is_compressed = false;
+
+ return(true);
+}
+
+/* ********************************************************** */
+
+bool ndpi_bitmap64_isset(ndpi_bitmap64 *_b, u_int64_t value) {
+ ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+
+ if(!b->is_compressed) ndpi_bitmap64_compress(b);
+
+ return(binary_fuse16_contain(value, &b->bitmap));
}
-/* ******************************************* */
+/* ********************************************************** */
+
+void ndpi_bitmap64_free(ndpi_bitmap64 *_b) {
+ ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+
+ if(b->entries) ndpi_free(b->entries);
-u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b) {
- return(binary_fuse16_size_in_bytes((binary_fuse16_t*)b));
+ if(b->is_compressed)
+ binary_fuse16_free(&b->bitmap);
+
+ ndpi_free(b);
}
+/* ********************************************************** */
+u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *_b) {
+ ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b;
+
+ return(sizeof(ndpi_bitmap64) + binary_fuse16_size_in_bytes(&b->bitmap));
+}
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc
index 9bb40ae7c..6fe13e852 100644
--- a/src/lib/ndpi_content_match.c.inc
+++ b/src/lib/ndpi_content_match.c.inc
@@ -1215,6 +1215,7 @@ static ndpi_protocol_match host_match[] =
{ "senderscore.com", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL },
{ "ixhash.net", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL },
{ "esvarbl.com", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL },
+ { "abuse.ch", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL },
{ ".dnsbl.", /* www.dnsbl.info */ "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL },
{ "iqiyi.com", "PPStream", NDPI_PROTOCOL_PPSTREAM, NDPI_PROTOCOL_CATEGORY_STREAMING, NDPI_PROTOCOL_FUN, NDPI_PROTOCOL_DEFAULT_LEVEL },
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index 904a716ac..2b2e5b6f6 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -32,389 +32,20 @@
#define DEBUG_CONTAINS
#endif
-//#define USE_BINARY_BITMAP
-
-#ifdef USE_BINARY_BITMAP
-
-/* ********************************************************** */
-/* ********************************************************** */
-
-/* Faster but it uses more memory */
-
-void ndpi_domain_classify_free(ndpi_domain_classify *search) {
- ndpi_binary_bitmap_free(search->bitmap);
- ndpi_free(search);
-}
-
-/* ********************************************************** */
-
ndpi_domain_classify* ndpi_domain_classify_alloc() {
- ndpi_domain_classify *search = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify));
-
- if(!search) return(NULL);
-
- if((search->bitmap = ndpi_binary_bitmap_alloc()) == NULL)
- goto toobad;
-
- return(search);
-
- toobad:
- ndpi_domain_classify_free(search);
- return(NULL);
-}
-
-/* ********************************************************** */
-
-u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *c) {
- return(sizeof(ndpi_domain_classify)+ndpi_binary_bitmap_size(c->bitmap));
-}
-
-/* ********************************************************** */
-
-bool ndpi_domain_classify_add(ndpi_domain_classify *c,
- u_int8_t class_id,
- char *domain) {
- u_int64_t hash;
- char *dot = strrchr(domain, '.');
-
- if(!dot) return(false);
- if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
- return(false);
-
- /* Skip heading dots */
- while(domain[0] == '.') domain++;
-
- hash = ndpi_quick_hash64(domain, strlen(domain));
-
-#ifdef DEBUG_ADD
- printf("[add] %s @ %u [hash: %llu]\n", domain, class_id, hash);
-
-#if 0
- if(ndpi_binary_bitmap_isset(c->bitmap, hash, &class_id))
- printf("[add] False positive %s @ %u [hash: %llu]\n", domain, class_id, hash);
-#endif
-#endif
-
- return(ndpi_binary_bitmap_set(c->bitmap, hash, class_id));
-}
-
-/* ********************************************************** */
-
-u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_c,
- u_int8_t class_id,
- char *file_path) {
- u_int32_t num_added = 0;
- char buf[256];
- FILE *fd;
- char *line;
-
- fd = fopen(file_path, "r");
- if(fd == NULL)
- return(false);
-
- while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
- u_int len;
-
- if((line[0] == '#') || (line[0] == '\0'))
- continue;
- else {
- len = strlen(line) - 1;
-
- if(len == 0)
- continue;
- else
- line[len] = '\0';
- }
-
- if(ndpi_domain_classify_add(_c, class_id, line))
- num_added++;
- }
-
- fclose(fd);
-
- return(num_added);
-}
-
-/* ********************************************************** */
-
-static bool is_valid_domain_char(u_char c) {
- if(((c >= 'A')&& (c <= 'Z'))
- || ((c >= 'a')&& (c <= 'z'))
- || ((c >= '0')&& (c <= '9'))
- || (c == '_')
- || (c == '-')
- || (c == '.'))
- return(true);
- else
- return(false);
-}
-
-/* ********************************************************** */
-
-bool ndpi_domain_classify_contains(ndpi_domain_classify *c,
- u_int8_t *class_id /* out */,
- char *domain) {
- u_int32_t len;
- char *dot, *elem, *last_dot;
-
- if(!domain) return(false);
- if((len = strlen(domain)) == 0) return(false);
- if((dot = strrchr(domain, '.')) == NULL) return(false);
- if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false);
-
- /* This is a number or a numeric IP or similar */
- if(isdigit(domain[len-1]) && isdigit(domain[0])) {
-#ifdef DEBUG_CONTAINS
- printf("[contains] %s INVALID\n", domain);
-#endif
-
- return(false);
- }
-
- if(!is_valid_domain_char(domain[0])) {
-#ifdef DEBUG_CONTAINS
- printf("[contains] %s INVALID\n", domain);
-#endif
-
- return(false);
- }
-
- elem = domain, last_dot = strrchr(domain, '.');
-
- while(true) {
- u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem));
-
-#ifdef DEBUG_CONTAINS
- printf("[contains] Searching %s [hash: %llu]\n", elem, hash);
-#endif
-
- if(ndpi_binary_bitmap_isset(c->bitmap, hash, class_id)) {
-#ifdef DEBUG_CONTAINS
- printf("[contains] %s = %d\n", elem, *class_id);
-#endif
- return(true);
- }
-
- if((elem = strchr(elem, '.')) == NULL)
- break;
- else {
- if(elem == last_dot)
- break;
- else
- elem = &elem[1];
- }
- }
-
-#ifdef DEBUG_CONTAINS
- printf("[contains] %s NOT FOUND\n", domain);
-#endif
-
- return(false);
-}
-
-#else /* ! USE_BINARY_BITMAP */
-
-/* ********************************************************** */
-/* ********************************************************** */
-
-#define END_OF_TOKENS_DELIMITER 0x12345678
-#define NUM_DOMAIN_BITMAPS 8
-#define NUM_DOMAIN_BITMAPS_THRESHOLD (NUM_DOMAIN_BITMAPS-1)
-#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 8
-
-typedef struct {
- ndpi_bitmap *bitmap[NUM_DOMAIN_BITMAPS];
-} ndpi_domain_search;
-
-typedef struct {
- u_int16_t class_id;
- ndpi_domain_search *domains;
-} ndpi_domain_classify_t;
-
-typedef struct {
- ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
-} ndpi_domain_classifications_t;
-
-/* ********************************************************** */
-
-static void ndpi_domain_search_free(ndpi_domain_search *search) {
- u_int16_t i;
-
- for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
- if(search->bitmap[i] == NULL)
- break;
-
- ndpi_bitmap_free(search->bitmap[i]);
- }
-
- ndpi_free(search);
-}
-
-/* ********************************************************** */
-
-static ndpi_domain_search* ndpi_domain_search_alloc() {
- ndpi_domain_search *search = (ndpi_domain_search*)ndpi_calloc(NUM_DOMAIN_BITMAPS, sizeof(ndpi_domain_search));
- u_int16_t i;
-
- if(!search) return(NULL);
-
- for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
- if((search->bitmap[i] = ndpi_bitmap_alloc()) == NULL)
- goto toobad;
- }
-
- return(search);
-
- toobad:
- ndpi_domain_search_free(search);
- return(NULL);
-}
-
-/* ********************************************************** */
-
-static u_int32_t ndpi_domain_search_size(ndpi_domain_search *search) {
- u_int32_t i, total_len = 0;
-
- for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
- char *buf;
-
- total_len += ndpi_bitmap_serialize(search->bitmap[i], &buf);
- ndpi_free(buf);
- }
-
- return(total_len);
-}
-
-/* ********************************************************** */
-
-/* NOTE: domain will be modified: copy it if necessary */
-static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) {
- char *elem;
- u_int32_t bitmap_id = 0, len, hsum = 0;
- bool quit = false;
-
- if(domain == NULL) return(false);
- if((len = strlen(domain)) == 0) return(false);
-
- len--;
- while((len > 0)
- && ((domain[len] == '.')
- || (domain[len] == '\n')
- || (domain[len] == '\r'))
- )
- domain[len--] = '\0';
-
- if(domain[0] == '.') ++domain;
-
- elem = strrchr(domain, '.');
- while(elem) {
- u_int32_t h;
-
- if(elem[0] == '.') elem = &elem[1];
-
- h = ndpi_hash_string(elem);
-
- if(elem == domain) {
- /* We're adding the beginning of the domain, hence the last token before quitting */
- h += END_OF_TOKENS_DELIMITER;
-
-#ifdef DEBUG_ADD
- if(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum))
- printf("[add] False positive while adding %s (%s) [%u][bitmap_id: %u]\n",
- elem, domain, h + hsum, bitmap_id);
-#endif
- }
-
-#ifdef DEBUG_ADD
- printf("[add] Trying to add %s [%s][%u][bitmap_id: %u]\n",
- elem, domain, h + hsum, bitmap_id);
-#endif
-
- ndpi_bitmap_set(search->bitmap[bitmap_id], h + hsum);
-
- bitmap_id++, hsum += h;
-
- if(quit)
- break;
-
- if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
- elem = domain, quit = true; /* Hash the rest of the word */
- else {
- elem[-1] = '\0';
- elem = strrchr(domain, '.');
-
- if(elem == NULL)
- elem = domain, quit = true;
- }
- }
-
- return(bitmap_id);
-}
-
-/* ********************************************************** */
-
-static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain) {
- char *elem;
- u_int32_t bitmap_id = 0, hsum = 0;
- bool quit = false;
-
- if((elem = strrchr(domain, '.')) == NULL)
- return(false); /* This does not look like a domain */
-
- while(elem) {
- u_int32_t h;
-
- if(elem[0] == '.') elem = &elem[1];
-
- h = ndpi_hash_string(elem);
-
- if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) {
- /* Exact match does not work, so let's see if a partial match works instead */
-
- /* We're adding the beginning of the domain, hence the last token before quitting */
- h += END_OF_TOKENS_DELIMITER;
-
- return(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum));
- }
-
- bitmap_id++, hsum += h;
-
- if(quit)
- break;
-
- if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
- elem = domain, quit = true; /* Hash the rest of the word */
- else {
- elem[-1] = '\0';
- elem = strrchr(domain, '.');
-
- if(elem == NULL)
- elem = domain, quit = true;
- }
- }
-
- return(true);
-}
-
-/* ********************************************************** */
-/* ********************************************************** */
-
-ndpi_domain_classify* ndpi_domain_classify_alloc() {
- ndpi_domain_classify_t *cat = (ndpi_domain_classify_t*)ndpi_calloc(1, sizeof(ndpi_domain_classifications_t));
+ ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_calloc(1, sizeof(ndpi_domain_classify));
return((ndpi_domain_classify*)cat);
}
/* ********************************************************** */
-void ndpi_domain_classify_free(ndpi_domain_classify *_s) {
+void ndpi_domain_classify_free(ndpi_domain_classify *s) {
u_int32_t i;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- ndpi_domain_search_free(s->class[i]->domains);
- ndpi_free(s->class[i]);
+ if(s->classes[i].domains != NULL) {
+ ndpi_bitmap64_free(s->classes[i].domains);
} else
break;
}
@@ -424,13 +55,12 @@ void ndpi_domain_classify_free(ndpi_domain_classify *_s) {
/* ********************************************************** */
-u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) {
- u_int32_t i, tot_len = sizeof(ndpi_domain_classify_t);
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s) {
+ u_int32_t i, tot_len = sizeof(ndpi_domain_classify);
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- tot_len += ndpi_domain_search_size(s->class[i]->domains) + sizeof(ndpi_domain_classify_t);
+ if(s->classes[i].domains != NULL) {
+ tot_len += ndpi_bitmap64_size(s->classes[i].domains);
} else
break;
}
@@ -440,30 +70,22 @@ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) {
/* ********************************************************** */
-bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
+bool ndpi_domain_classify_add(ndpi_domain_classify *s,
u_int8_t class_id,
char *domain) {
u_int32_t i;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
- char buf[256], *dot = strrchr(domain, '.');
+ char *dot = strrchr(domain, '.');
if(!dot) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- if(s->class[i]->class_id == class_id) {
- break;
- }
- } else {
- s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
-
- if(s->class[i] == NULL)
- return(false);
-
- s->class[i]->class_id = class_id;
- s->class[i]->domains = ndpi_domain_search_alloc();
+ if(s->classes[i].class_id == class_id) {
+ break;
+ } else if(s->classes[i].class_id == 0) {
+ s->classes[i].class_id = class_id;
+ s->classes[i].domains = ndpi_bitmap64_alloc();
break;
}
}
@@ -471,39 +93,26 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
return(false);
- snprintf(buf, sizeof(buf), "%s", domain);
-
-#ifdef DEBUG_ADD
- printf("[add] %s @ %u\n", domain, class_id);
-#endif
-
- return(ndpi_domain_search_add(s->class[i]->domains, buf));
+ return(ndpi_bitmap64_set(s->classes[i].domains,
+ ndpi_quick_hash64(domain, strlen(domain))));
}
/* ********************************************************** */
-u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
+u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
u_int8_t class_id,
char *file_path) {
u_int32_t i, num_added = 0;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
char buf[256];
FILE *fd;
char *line;
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- if(s->class[i]->class_id == class_id) {
- break;
- }
- } else {
- s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
-
- if(s->class[i] == NULL)
- return(false);
-
- s->class[i]->class_id = class_id;
- s->class[i]->domains = ndpi_domain_search_alloc();
+ if(s->classes[i].class_id == class_id) {
+ break;
+ } else if(s->classes[i].class_id == 0) {
+ s->classes[i].class_id = class_id;
+ s->classes[i].domains = ndpi_bitmap64_alloc();
break;
}
}
@@ -531,7 +140,8 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
line[len] = '\0';
}
- if(ndpi_domain_search_add(s->class[i]->domains, line))
+ if(ndpi_bitmap64_set(s->classes[i].domains,
+ ndpi_quick_hash64(line, strlen(line))))
num_added++;
}
@@ -556,12 +166,12 @@ static bool is_valid_domain_char(u_char c) {
/* ********************************************************** */
-bool ndpi_domain_classify_contains(ndpi_domain_classify *_s,
+bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
u_int8_t *class_id /* out */,
char *domain) {
u_int32_t i, len;
- ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
- char *dot;
+ u_int64_t hash;
+ char *dot, *elem;
if(!domain) return(false);
if((len = strlen(domain)) == 0) return(false);
@@ -585,22 +195,32 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s,
return(false);
}
- for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
- if(s->class[i] != NULL) {
- char buf[256];
-
- snprintf(buf, sizeof(buf), "%s", domain);
-
- if(ndpi_domain_search_contains(s->class[i]->domains, buf)) {
+ elem = domain;
+
+ while(elem != NULL) {
+ hash = ndpi_quick_hash64(elem, strlen(elem));
+
+ for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
+ if(s->classes[i].class_id != 0) {
+ if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) {
#ifdef DEBUG_CONTAINS
- printf("[contains] %s = %d\n", domain, s->class[i]->class_id);
+ printf("[contains] %s = %d\n", domain, s->classes[i].class_id);
#endif
- *class_id = s->class[i]->class_id;
- return(true);
- }
+ *class_id = s->classes[i].class_id;
+ return(true);
+ }
+ } else
+ break;
}
- }
+ elem = strchr(elem, '.');
+
+ if((elem == NULL) || (elem == dot))
+ break;
+ else
+ elem = &elem[1];
+ } /* while */
+
#ifdef DEBUG_CONTAINS
printf("[contains] %s NOT FOUND\n", domain);
#endif
@@ -608,5 +228,3 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s,
return(false);
}
-
-#endif