diff options
author | Luca Deri <deri@ntop.org> | 2023-09-05 17:03:20 +0200 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2023-09-05 17:03:20 +0200 |
commit | 978df906b38c26a359b2d91089e70e133cc7502e (patch) | |
tree | ac44a768d718f0a3265b3d11c01ab9305adc34dd /src | |
parent | 0080d0d092e245aa865bf757e682496e97257d25 (diff) |
Improved classification further reducing memory used
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h | 14 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 7 | ||||
-rw-r--r-- | src/lib/ndpi_binary_bitmap.c | 36 | ||||
-rw-r--r-- | src/lib/ndpi_bitmap64.c | 153 | ||||
-rw-r--r-- | src/lib/ndpi_content_match.c.inc | 1 | ||||
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 482 |
6 files changed, 224 insertions, 469 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 76ff9fadd..612847da4 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -2037,12 +2037,13 @@ extern "C" { on https://github.com/FastFilter/xor_singleheader/tree/master */ - ndpi_bitmap64* ndpi_bitmap64_alloc_size(u_int32_t size); - void ndpi_bitmap64_free(ndpi_bitmap64* b); - void ndpi_bitmap64_set(ndpi_bitmap64* b, u_int64_t value); - bool ndpi_bitmap64_isset(ndpi_bitmap64* b, u_int64_t value); + ndpi_bitmap64* ndpi_bitmap64_alloc(); + bool ndpi_bitmap64_set(ndpi_bitmap64 *b, u_int64_t value); + bool ndpi_bitmap64_compress(ndpi_bitmap64 *b); + bool ndpi_bitmap64_isset(ndpi_bitmap64 *b, u_int64_t value); + void ndpi_bitmap64_free(ndpi_bitmap64 *b); u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b); - + /* ******************************* */ /* Bloom-filter on steroids based on ndpi_bitmap @@ -2110,6 +2111,9 @@ extern "C" { void ndpi_binary_bitmap_free(ndpi_binary_bitmap *b); u_int32_t ndpi_binary_bitmap_size(ndpi_binary_bitmap *b); u_int32_t ndpi_binary_bitmap_cardinality(ndpi_binary_bitmap *b); + + /* ******************************* */ + /* ******************************* */ diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 553440a2b..53535a441 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1199,8 +1199,13 @@ typedef struct { bool is_compressed; } ndpi_binary_bitmap; +#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 16 + typedef struct { - ndpi_binary_bitmap *bitmap; + struct { + u_int16_t class_id; + ndpi_bitmap64 *domains; + } classes[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS]; } ndpi_domain_classify; #ifdef NDPI_LIB_COMPILATION diff --git a/src/lib/ndpi_binary_bitmap.c b/src/lib/ndpi_binary_bitmap.c index 4360e574f..184bb99df 100644 --- a/src/lib/ndpi_binary_bitmap.c +++ b/src/lib/ndpi_binary_bitmap.c @@ -47,6 +47,10 @@ ndpi_binary_bitmap* ndpi_binary_bitmap_alloc() { return(NULL); } +#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY + rc->bitmap = NULL; +#endif + rc->is_compressed = false; return(rc); @@ -131,6 +135,22 @@ bool ndpi_binary_bitmap_compress(ndpi_binary_bitmap *b) { } b->is_compressed = true; + +#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY + if(b->bitmap != NULL) ndpi_bitmap64_free(b->bitmap); + b->bitmap = ndpi_bitmap64_alloc_size(b->num_used_entries); + + u_int64_t *values = (u_int64_t*)ndpi_malloc(sizeof(u_int64_t)*b->num_used_entries); + + if(values) { + for(i=0; i<b->num_used_entries; i++) + values[i] = b->entries[i].value; + + ndpi_bitmap64_multiset(b->bitmap, values, b->num_used_entries); + ndpi_free(values); + } +#endif + return(true); } @@ -140,10 +160,14 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t * if(!b->is_compressed) ndpi_binary_bitmap_compress(b); if(b->num_used_entries > 0) { +#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY + return(ndpi_bitmap64_isset(b->bitmap, value)); +#else struct ndpi_binary_bitmap_entry *rc; struct ndpi_binary_bitmap_entry tofind; - tofind.value = value; rc = (struct ndpi_binary_bitmap_entry*)bsearch(&tofind, b->entries, + tofind.value = value; + rc = (struct ndpi_binary_bitmap_entry*)bsearch(&tofind, b->entries, b->num_used_entries, sizeof(struct ndpi_binary_bitmap_entry), ndpi_binary_bitmap_entry_compare); @@ -151,6 +175,7 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t * *out_category = rc->category; return(rc == NULL ? false : true); +#endif } else return(false); } @@ -159,13 +184,22 @@ bool ndpi_binary_bitmap_isset(ndpi_binary_bitmap *b, u_int64_t value, u_int8_t * void ndpi_binary_bitmap_free(ndpi_binary_bitmap *b) { ndpi_free(b->entries); + +#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY + if(b->bitmap != NULL) ndpi_bitmap64_free(b->bitmap); +#endif + ndpi_free(b); } /* ********************************************************** */ u_int32_t ndpi_binary_bitmap_size(ndpi_binary_bitmap *b) { +#ifdef USE_BITMAP64_BINARY_BITMAP_MEMORY + return(sizeof(ndpi_binary_bitmap) + ndpi_bitmap64_size(b->bitmap)); +#else return(sizeof(ndpi_binary_bitmap) + b->num_used_entries * sizeof(struct ndpi_binary_bitmap_entry)); +#endif } /* ********************************************************** */ diff --git a/src/lib/ndpi_bitmap64.c b/src/lib/ndpi_bitmap64.c index 75351af9a..ae34a2704 100644 --- a/src/lib/ndpi_bitmap64.c +++ b/src/lib/ndpi_bitmap64.c @@ -1,11 +1,8 @@ /* - * ndpi_bitmap.c + * ndpi_bitmap64.c * * Copyright (C) 2011-23 - ntop.org and contributors * - * This file is part of nDPI, an open source deep packet inspection - * library based on the OpenDPI and PACE technology by ipoque GmbH - * * nDPI is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -27,53 +24,149 @@ #include <math.h> #include <sys/types.h> -#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN +#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN #include "ndpi_config.h" #include "ndpi_api.h" -#include "ndpi_includes.h" -#include "ndpi_encryption.h" - #include "third_party/include/binaryfusefilter.h" -/* ******************************************* */ +#define NDPI_BITMAP64_REALLOC_SIZE 4096 -ndpi_bitmap64* ndpi_bitmap64_alloc_size(u_int32_t num_items) { - binary_fuse16_t *b = (binary_fuse16_t*)ndpi_malloc(sizeof(binary_fuse16_t)); - - if(b == NULL) return(NULL); +// #define PRINT_DUPLICATED_HASHS + +typedef struct { + u_int32_t num_allocated_entries, num_used_entries; + u_int64_t *entries; + bool is_compressed; + binary_fuse16_t bitmap; +} ndpi_bitmap64_t; - if(binary_fuse16_allocate(num_items, b)) - return((ndpi_bitmap64*)b); - else { - ndpi_free(b); +/* ********************************************************** */ + +ndpi_bitmap64* ndpi_bitmap64_alloc() { + ndpi_bitmap64_t *rc = (ndpi_bitmap64_t*)ndpi_malloc(sizeof(ndpi_bitmap64_t)); + + if(!rc) return(rc); + + rc->num_allocated_entries = NDPI_BITMAP64_REALLOC_SIZE, rc->num_used_entries = 0; + if((rc->entries = (u_int64_t*)ndpi_calloc(rc->num_allocated_entries, sizeof(u_int64_t))) == NULL) { + ndpi_free(rc); return(NULL); } + + rc->is_compressed = false; + + return((ndpi_bitmap64*)rc); } -/* ******************************************* */ +/* ********************************************************** */ -void ndpi_bitmap64_free(ndpi_bitmap64* b) { - binary_fuse16_free((binary_fuse16_t*)b); - ndpi_free(b); +static int ndpi_bitmap64_entry_compare(const void *_a, const void *_b) { + u_int64_t *a = (u_int64_t*)_a, *b = (u_int64_t*)_b; + + if(*a < *b) return -1; + else if(*a > *b) return 1; + else return 0; } -/* ******************************************* */ +/* ********************************************************** */ + +/* Sort and compact memory before searching */ +bool ndpi_bitmap64_compress(ndpi_bitmap64 *_b) { + ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b; + u_int32_t i; + + if(b->num_used_entries > 0) { + if(b->num_used_entries > 1) + qsort(b->entries, b->num_used_entries, + sizeof(u_int64_t), + ndpi_bitmap64_entry_compare); + + /* Now remove duplicates */ + u_int64_t old_value = b->entries[0], new_len = 1; + + for(i=1; i<b->num_used_entries; i++) { + if(b->entries[i] != old_value) { + if(new_len != i) + memcpy(&b->entries[new_len], &b->entries[i], sizeof(u_int64_t)); + + old_value = b->entries[i]; + new_len++; + } else { +#ifdef PRINT_DUPLICATED_HASHS + printf("Skipping duplicate hash %lluu [id: %u/%u]\n", + b->entries[i].value, i, b->num_used_entries); +#endif + } + } + + b->num_used_entries = b->num_allocated_entries = new_len; + } -void ndpi_bitmap64_set(ndpi_bitmap64* b, u_int64_t value) { - binary_fuse16_populate(&value, 1, (binary_fuse16_t*)b); + if(binary_fuse16_allocate(b->num_used_entries, &b->bitmap)) { + if(binary_fuse16_populate(b->entries, b->num_used_entries, &b->bitmap)) { + ndpi_free(b->entries), b->num_used_entries = b->num_allocated_entries = 0; + b->entries = NULL; + } else + return(false); + } else + return(false); + + b->is_compressed = true; + + return(true); } -/* ******************************************* */ +/* ********************************************************** */ + +bool ndpi_bitmap64_set(ndpi_bitmap64 *_b, u_int64_t value) { + ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b; + + if(b->num_used_entries >= b->num_allocated_entries) { + u_int64_t *rc; + u_int32_t new_len = b->num_allocated_entries + NDPI_BITMAP64_REALLOC_SIZE; + + rc = (u_int64_t*)ndpi_realloc(b->entries, + sizeof(u_int64_t)*b->num_allocated_entries, + sizeof(u_int64_t)*new_len); + if(rc == NULL) return(false); -bool ndpi_bitmap64_isset(ndpi_bitmap64* b, u_int64_t value) { - return(binary_fuse16_contain(value, (binary_fuse16_t*)b)); + b->entries = rc, b->num_allocated_entries = new_len; + } + + b->entries[b->num_used_entries] = value; + b->num_used_entries++, b->is_compressed = false; + + return(true); +} + +/* ********************************************************** */ + +bool ndpi_bitmap64_isset(ndpi_bitmap64 *_b, u_int64_t value) { + ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b; + + if(!b->is_compressed) ndpi_bitmap64_compress(b); + + return(binary_fuse16_contain(value, &b->bitmap)); } -/* ******************************************* */ +/* ********************************************************** */ + +void ndpi_bitmap64_free(ndpi_bitmap64 *_b) { + ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b; + + if(b->entries) ndpi_free(b->entries); -u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *b) { - return(binary_fuse16_size_in_bytes((binary_fuse16_t*)b)); + if(b->is_compressed) + binary_fuse16_free(&b->bitmap); + + ndpi_free(b); } +/* ********************************************************** */ +u_int32_t ndpi_bitmap64_size(ndpi_bitmap64 *_b) { + ndpi_bitmap64_t *b = (ndpi_bitmap64_t*)_b; + + return(sizeof(ndpi_bitmap64) + binary_fuse16_size_in_bytes(&b->bitmap)); +} diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc index 9bb40ae7c..6fe13e852 100644 --- a/src/lib/ndpi_content_match.c.inc +++ b/src/lib/ndpi_content_match.c.inc @@ -1215,6 +1215,7 @@ static ndpi_protocol_match host_match[] = { "senderscore.com", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL }, { "ixhash.net", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL }, { "esvarbl.com", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL }, + { "abuse.ch", "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL }, { ".dnsbl.", /* www.dnsbl.info */ "Cybersec", NDPI_PROTOCOL_CYBERSECURITY, NDPI_PROTOCOL_CATEGORY_CYBERSECURITY, NDPI_PROTOCOL_SAFE, NDPI_PROTOCOL_DEFAULT_LEVEL }, { "iqiyi.com", "PPStream", NDPI_PROTOCOL_PPSTREAM, NDPI_PROTOCOL_CATEGORY_STREAMING, NDPI_PROTOCOL_FUN, NDPI_PROTOCOL_DEFAULT_LEVEL }, diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index 904a716ac..2b2e5b6f6 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -32,389 +32,20 @@ #define DEBUG_CONTAINS #endif -//#define USE_BINARY_BITMAP - -#ifdef USE_BINARY_BITMAP - -/* ********************************************************** */ -/* ********************************************************** */ - -/* Faster but it uses more memory */ - -void ndpi_domain_classify_free(ndpi_domain_classify *search) { - ndpi_binary_bitmap_free(search->bitmap); - ndpi_free(search); -} - -/* ********************************************************** */ - ndpi_domain_classify* ndpi_domain_classify_alloc() { - ndpi_domain_classify *search = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify)); - - if(!search) return(NULL); - - if((search->bitmap = ndpi_binary_bitmap_alloc()) == NULL) - goto toobad; - - return(search); - - toobad: - ndpi_domain_classify_free(search); - return(NULL); -} - -/* ********************************************************** */ - -u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *c) { - return(sizeof(ndpi_domain_classify)+ndpi_binary_bitmap_size(c->bitmap)); -} - -/* ********************************************************** */ - -bool ndpi_domain_classify_add(ndpi_domain_classify *c, - u_int8_t class_id, - char *domain) { - u_int64_t hash; - char *dot = strrchr(domain, '.'); - - if(!dot) return(false); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) - return(false); - - /* Skip heading dots */ - while(domain[0] == '.') domain++; - - hash = ndpi_quick_hash64(domain, strlen(domain)); - -#ifdef DEBUG_ADD - printf("[add] %s @ %u [hash: %llu]\n", domain, class_id, hash); - -#if 0 - if(ndpi_binary_bitmap_isset(c->bitmap, hash, &class_id)) - printf("[add] False positive %s @ %u [hash: %llu]\n", domain, class_id, hash); -#endif -#endif - - return(ndpi_binary_bitmap_set(c->bitmap, hash, class_id)); -} - -/* ********************************************************** */ - -u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_c, - u_int8_t class_id, - char *file_path) { - u_int32_t num_added = 0; - char buf[256]; - FILE *fd; - char *line; - - fd = fopen(file_path, "r"); - if(fd == NULL) - return(false); - - while((line = fgets(buf, sizeof(buf), fd)) != NULL) { - u_int len; - - if((line[0] == '#') || (line[0] == '\0')) - continue; - else { - len = strlen(line) - 1; - - if(len == 0) - continue; - else - line[len] = '\0'; - } - - if(ndpi_domain_classify_add(_c, class_id, line)) - num_added++; - } - - fclose(fd); - - return(num_added); -} - -/* ********************************************************** */ - -static bool is_valid_domain_char(u_char c) { - if(((c >= 'A')&& (c <= 'Z')) - || ((c >= 'a')&& (c <= 'z')) - || ((c >= '0')&& (c <= '9')) - || (c == '_') - || (c == '-') - || (c == '.')) - return(true); - else - return(false); -} - -/* ********************************************************** */ - -bool ndpi_domain_classify_contains(ndpi_domain_classify *c, - u_int8_t *class_id /* out */, - char *domain) { - u_int32_t len; - char *dot, *elem, *last_dot; - - if(!domain) return(false); - if((len = strlen(domain)) == 0) return(false); - if((dot = strrchr(domain, '.')) == NULL) return(false); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); - - /* This is a number or a numeric IP or similar */ - if(isdigit(domain[len-1]) && isdigit(domain[0])) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", domain); -#endif - - return(false); - } - - if(!is_valid_domain_char(domain[0])) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", domain); -#endif - - return(false); - } - - elem = domain, last_dot = strrchr(domain, '.'); - - while(true) { - u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem)); - -#ifdef DEBUG_CONTAINS - printf("[contains] Searching %s [hash: %llu]\n", elem, hash); -#endif - - if(ndpi_binary_bitmap_isset(c->bitmap, hash, class_id)) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", elem, *class_id); -#endif - return(true); - } - - if((elem = strchr(elem, '.')) == NULL) - break; - else { - if(elem == last_dot) - break; - else - elem = &elem[1]; - } - } - -#ifdef DEBUG_CONTAINS - printf("[contains] %s NOT FOUND\n", domain); -#endif - - return(false); -} - -#else /* ! USE_BINARY_BITMAP */ - -/* ********************************************************** */ -/* ********************************************************** */ - -#define END_OF_TOKENS_DELIMITER 0x12345678 -#define NUM_DOMAIN_BITMAPS 8 -#define NUM_DOMAIN_BITMAPS_THRESHOLD (NUM_DOMAIN_BITMAPS-1) -#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 8 - -typedef struct { - ndpi_bitmap *bitmap[NUM_DOMAIN_BITMAPS]; -} ndpi_domain_search; - -typedef struct { - u_int16_t class_id; - ndpi_domain_search *domains; -} ndpi_domain_classify_t; - -typedef struct { - ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS]; -} ndpi_domain_classifications_t; - -/* ********************************************************** */ - -static void ndpi_domain_search_free(ndpi_domain_search *search) { - u_int16_t i; - - for(i=0; i<NUM_DOMAIN_BITMAPS; i++) { - if(search->bitmap[i] == NULL) - break; - - ndpi_bitmap_free(search->bitmap[i]); - } - - ndpi_free(search); -} - -/* ********************************************************** */ - -static ndpi_domain_search* ndpi_domain_search_alloc() { - ndpi_domain_search *search = (ndpi_domain_search*)ndpi_calloc(NUM_DOMAIN_BITMAPS, sizeof(ndpi_domain_search)); - u_int16_t i; - - if(!search) return(NULL); - - for(i=0; i<NUM_DOMAIN_BITMAPS; i++) { - if((search->bitmap[i] = ndpi_bitmap_alloc()) == NULL) - goto toobad; - } - - return(search); - - toobad: - ndpi_domain_search_free(search); - return(NULL); -} - -/* ********************************************************** */ - -static u_int32_t ndpi_domain_search_size(ndpi_domain_search *search) { - u_int32_t i, total_len = 0; - - for(i=0; i<NUM_DOMAIN_BITMAPS; i++) { - char *buf; - - total_len += ndpi_bitmap_serialize(search->bitmap[i], &buf); - ndpi_free(buf); - } - - return(total_len); -} - -/* ********************************************************** */ - -/* NOTE: domain will be modified: copy it if necessary */ -static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) { - char *elem; - u_int32_t bitmap_id = 0, len, hsum = 0; - bool quit = false; - - if(domain == NULL) return(false); - if((len = strlen(domain)) == 0) return(false); - - len--; - while((len > 0) - && ((domain[len] == '.') - || (domain[len] == '\n') - || (domain[len] == '\r')) - ) - domain[len--] = '\0'; - - if(domain[0] == '.') ++domain; - - elem = strrchr(domain, '.'); - while(elem) { - u_int32_t h; - - if(elem[0] == '.') elem = &elem[1]; - - h = ndpi_hash_string(elem); - - if(elem == domain) { - /* We're adding the beginning of the domain, hence the last token before quitting */ - h += END_OF_TOKENS_DELIMITER; - -#ifdef DEBUG_ADD - if(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) - printf("[add] False positive while adding %s (%s) [%u][bitmap_id: %u]\n", - elem, domain, h + hsum, bitmap_id); -#endif - } - -#ifdef DEBUG_ADD - printf("[add] Trying to add %s [%s][%u][bitmap_id: %u]\n", - elem, domain, h + hsum, bitmap_id); -#endif - - ndpi_bitmap_set(search->bitmap[bitmap_id], h + hsum); - - bitmap_id++, hsum += h; - - if(quit) - break; - - if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD) - elem = domain, quit = true; /* Hash the rest of the word */ - else { - elem[-1] = '\0'; - elem = strrchr(domain, '.'); - - if(elem == NULL) - elem = domain, quit = true; - } - } - - return(bitmap_id); -} - -/* ********************************************************** */ - -static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain) { - char *elem; - u_int32_t bitmap_id = 0, hsum = 0; - bool quit = false; - - if((elem = strrchr(domain, '.')) == NULL) - return(false); /* This does not look like a domain */ - - while(elem) { - u_int32_t h; - - if(elem[0] == '.') elem = &elem[1]; - - h = ndpi_hash_string(elem); - - if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) { - /* Exact match does not work, so let's see if a partial match works instead */ - - /* We're adding the beginning of the domain, hence the last token before quitting */ - h += END_OF_TOKENS_DELIMITER; - - return(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)); - } - - bitmap_id++, hsum += h; - - if(quit) - break; - - if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD) - elem = domain, quit = true; /* Hash the rest of the word */ - else { - elem[-1] = '\0'; - elem = strrchr(domain, '.'); - - if(elem == NULL) - elem = domain, quit = true; - } - } - - return(true); -} - -/* ********************************************************** */ -/* ********************************************************** */ - -ndpi_domain_classify* ndpi_domain_classify_alloc() { - ndpi_domain_classify_t *cat = (ndpi_domain_classify_t*)ndpi_calloc(1, sizeof(ndpi_domain_classifications_t)); + ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_calloc(1, sizeof(ndpi_domain_classify)); return((ndpi_domain_classify*)cat); } /* ********************************************************** */ -void ndpi_domain_classify_free(ndpi_domain_classify *_s) { +void ndpi_domain_classify_free(ndpi_domain_classify *s) { u_int32_t i; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - ndpi_domain_search_free(s->class[i]->domains); - ndpi_free(s->class[i]); + if(s->classes[i].domains != NULL) { + ndpi_bitmap64_free(s->classes[i].domains); } else break; } @@ -424,13 +55,12 @@ void ndpi_domain_classify_free(ndpi_domain_classify *_s) { /* ********************************************************** */ -u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) { - u_int32_t i, tot_len = sizeof(ndpi_domain_classify_t); - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; +u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s) { + u_int32_t i, tot_len = sizeof(ndpi_domain_classify); for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - tot_len += ndpi_domain_search_size(s->class[i]->domains) + sizeof(ndpi_domain_classify_t); + if(s->classes[i].domains != NULL) { + tot_len += ndpi_bitmap64_size(s->classes[i].domains); } else break; } @@ -440,30 +70,22 @@ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) { /* ********************************************************** */ -bool ndpi_domain_classify_add(ndpi_domain_classify *_s, +bool ndpi_domain_classify_add(ndpi_domain_classify *s, u_int8_t class_id, char *domain) { u_int32_t i; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; - char buf[256], *dot = strrchr(domain, '.'); + char *dot = strrchr(domain, '.'); if(!dot) return(false); if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - if(s->class[i]->class_id == class_id) { - break; - } - } else { - s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t)); - - if(s->class[i] == NULL) - return(false); - - s->class[i]->class_id = class_id; - s->class[i]->domains = ndpi_domain_search_alloc(); + if(s->classes[i].class_id == class_id) { + break; + } else if(s->classes[i].class_id == 0) { + s->classes[i].class_id = class_id; + s->classes[i].domains = ndpi_bitmap64_alloc(); break; } } @@ -471,39 +93,26 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s, if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS) return(false); - snprintf(buf, sizeof(buf), "%s", domain); - -#ifdef DEBUG_ADD - printf("[add] %s @ %u\n", domain, class_id); -#endif - - return(ndpi_domain_search_add(s->class[i]->domains, buf)); + return(ndpi_bitmap64_set(s->classes[i].domains, + ndpi_quick_hash64(domain, strlen(domain)))); } /* ********************************************************** */ -u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s, +u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, u_int8_t class_id, char *file_path) { u_int32_t i, num_added = 0; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; char buf[256]; FILE *fd; char *line; for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - if(s->class[i]->class_id == class_id) { - break; - } - } else { - s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t)); - - if(s->class[i] == NULL) - return(false); - - s->class[i]->class_id = class_id; - s->class[i]->domains = ndpi_domain_search_alloc(); + if(s->classes[i].class_id == class_id) { + break; + } else if(s->classes[i].class_id == 0) { + s->classes[i].class_id = class_id; + s->classes[i].domains = ndpi_bitmap64_alloc(); break; } } @@ -531,7 +140,8 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s, line[len] = '\0'; } - if(ndpi_domain_search_add(s->class[i]->domains, line)) + if(ndpi_bitmap64_set(s->classes[i].domains, + ndpi_quick_hash64(line, strlen(line)))) num_added++; } @@ -556,12 +166,12 @@ static bool is_valid_domain_char(u_char c) { /* ********************************************************** */ -bool ndpi_domain_classify_contains(ndpi_domain_classify *_s, +bool ndpi_domain_classify_contains(ndpi_domain_classify *s, u_int8_t *class_id /* out */, char *domain) { u_int32_t i, len; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; - char *dot; + u_int64_t hash; + char *dot, *elem; if(!domain) return(false); if((len = strlen(domain)) == 0) return(false); @@ -585,22 +195,32 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s, return(false); } - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - char buf[256]; - - snprintf(buf, sizeof(buf), "%s", domain); - - if(ndpi_domain_search_contains(s->class[i]->domains, buf)) { + elem = domain; + + while(elem != NULL) { + hash = ndpi_quick_hash64(elem, strlen(elem)); + + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { + if(s->classes[i].class_id != 0) { + if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) { #ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", domain, s->class[i]->class_id); + printf("[contains] %s = %d\n", domain, s->classes[i].class_id); #endif - *class_id = s->class[i]->class_id; - return(true); - } + *class_id = s->classes[i].class_id; + return(true); + } + } else + break; } - } + elem = strchr(elem, '.'); + + if((elem == NULL) || (elem == dot)) + break; + else + elem = &elem[1]; + } /* while */ + #ifdef DEBUG_CONTAINS printf("[contains] %s NOT FOUND\n", domain); #endif @@ -608,5 +228,3 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s, return(false); } - -#endif |