diff options
Diffstat (limited to 'src/lib/ndpi_domain_classify.c')
-rw-r--r-- | src/lib/ndpi_domain_classify.c | 482 |
1 files changed, 50 insertions, 432 deletions
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c index 904a716ac..2b2e5b6f6 100644 --- a/src/lib/ndpi_domain_classify.c +++ b/src/lib/ndpi_domain_classify.c @@ -32,389 +32,20 @@ #define DEBUG_CONTAINS #endif -//#define USE_BINARY_BITMAP - -#ifdef USE_BINARY_BITMAP - -/* ********************************************************** */ -/* ********************************************************** */ - -/* Faster but it uses more memory */ - -void ndpi_domain_classify_free(ndpi_domain_classify *search) { - ndpi_binary_bitmap_free(search->bitmap); - ndpi_free(search); -} - -/* ********************************************************** */ - ndpi_domain_classify* ndpi_domain_classify_alloc() { - ndpi_domain_classify *search = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify)); - - if(!search) return(NULL); - - if((search->bitmap = ndpi_binary_bitmap_alloc()) == NULL) - goto toobad; - - return(search); - - toobad: - ndpi_domain_classify_free(search); - return(NULL); -} - -/* ********************************************************** */ - -u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *c) { - return(sizeof(ndpi_domain_classify)+ndpi_binary_bitmap_size(c->bitmap)); -} - -/* ********************************************************** */ - -bool ndpi_domain_classify_add(ndpi_domain_classify *c, - u_int8_t class_id, - char *domain) { - u_int64_t hash; - char *dot = strrchr(domain, '.'); - - if(!dot) return(false); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) - return(false); - - /* Skip heading dots */ - while(domain[0] == '.') domain++; - - hash = ndpi_quick_hash64(domain, strlen(domain)); - -#ifdef DEBUG_ADD - printf("[add] %s @ %u [hash: %llu]\n", domain, class_id, hash); - -#if 0 - if(ndpi_binary_bitmap_isset(c->bitmap, hash, &class_id)) - printf("[add] False positive %s @ %u [hash: %llu]\n", domain, class_id, hash); -#endif -#endif - - return(ndpi_binary_bitmap_set(c->bitmap, hash, class_id)); -} - -/* ********************************************************** */ - -u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_c, - u_int8_t class_id, - char *file_path) { - u_int32_t num_added = 0; - char buf[256]; - FILE *fd; - char *line; - - fd = fopen(file_path, "r"); - if(fd == NULL) - return(false); - - while((line = fgets(buf, sizeof(buf), fd)) != NULL) { - u_int len; - - if((line[0] == '#') || (line[0] == '\0')) - continue; - else { - len = strlen(line) - 1; - - if(len == 0) - continue; - else - line[len] = '\0'; - } - - if(ndpi_domain_classify_add(_c, class_id, line)) - num_added++; - } - - fclose(fd); - - return(num_added); -} - -/* ********************************************************** */ - -static bool is_valid_domain_char(u_char c) { - if(((c >= 'A')&& (c <= 'Z')) - || ((c >= 'a')&& (c <= 'z')) - || ((c >= '0')&& (c <= '9')) - || (c == '_') - || (c == '-') - || (c == '.')) - return(true); - else - return(false); -} - -/* ********************************************************** */ - -bool ndpi_domain_classify_contains(ndpi_domain_classify *c, - u_int8_t *class_id /* out */, - char *domain) { - u_int32_t len; - char *dot, *elem, *last_dot; - - if(!domain) return(false); - if((len = strlen(domain)) == 0) return(false); - if((dot = strrchr(domain, '.')) == NULL) return(false); - if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); - - /* This is a number or a numeric IP or similar */ - if(isdigit(domain[len-1]) && isdigit(domain[0])) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", domain); -#endif - - return(false); - } - - if(!is_valid_domain_char(domain[0])) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s INVALID\n", domain); -#endif - - return(false); - } - - elem = domain, last_dot = strrchr(domain, '.'); - - while(true) { - u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem)); - -#ifdef DEBUG_CONTAINS - printf("[contains] Searching %s [hash: %llu]\n", elem, hash); -#endif - - if(ndpi_binary_bitmap_isset(c->bitmap, hash, class_id)) { -#ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", elem, *class_id); -#endif - return(true); - } - - if((elem = strchr(elem, '.')) == NULL) - break; - else { - if(elem == last_dot) - break; - else - elem = &elem[1]; - } - } - -#ifdef DEBUG_CONTAINS - printf("[contains] %s NOT FOUND\n", domain); -#endif - - return(false); -} - -#else /* ! USE_BINARY_BITMAP */ - -/* ********************************************************** */ -/* ********************************************************** */ - -#define END_OF_TOKENS_DELIMITER 0x12345678 -#define NUM_DOMAIN_BITMAPS 8 -#define NUM_DOMAIN_BITMAPS_THRESHOLD (NUM_DOMAIN_BITMAPS-1) -#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 8 - -typedef struct { - ndpi_bitmap *bitmap[NUM_DOMAIN_BITMAPS]; -} ndpi_domain_search; - -typedef struct { - u_int16_t class_id; - ndpi_domain_search *domains; -} ndpi_domain_classify_t; - -typedef struct { - ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS]; -} ndpi_domain_classifications_t; - -/* ********************************************************** */ - -static void ndpi_domain_search_free(ndpi_domain_search *search) { - u_int16_t i; - - for(i=0; i<NUM_DOMAIN_BITMAPS; i++) { - if(search->bitmap[i] == NULL) - break; - - ndpi_bitmap_free(search->bitmap[i]); - } - - ndpi_free(search); -} - -/* ********************************************************** */ - -static ndpi_domain_search* ndpi_domain_search_alloc() { - ndpi_domain_search *search = (ndpi_domain_search*)ndpi_calloc(NUM_DOMAIN_BITMAPS, sizeof(ndpi_domain_search)); - u_int16_t i; - - if(!search) return(NULL); - - for(i=0; i<NUM_DOMAIN_BITMAPS; i++) { - if((search->bitmap[i] = ndpi_bitmap_alloc()) == NULL) - goto toobad; - } - - return(search); - - toobad: - ndpi_domain_search_free(search); - return(NULL); -} - -/* ********************************************************** */ - -static u_int32_t ndpi_domain_search_size(ndpi_domain_search *search) { - u_int32_t i, total_len = 0; - - for(i=0; i<NUM_DOMAIN_BITMAPS; i++) { - char *buf; - - total_len += ndpi_bitmap_serialize(search->bitmap[i], &buf); - ndpi_free(buf); - } - - return(total_len); -} - -/* ********************************************************** */ - -/* NOTE: domain will be modified: copy it if necessary */ -static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) { - char *elem; - u_int32_t bitmap_id = 0, len, hsum = 0; - bool quit = false; - - if(domain == NULL) return(false); - if((len = strlen(domain)) == 0) return(false); - - len--; - while((len > 0) - && ((domain[len] == '.') - || (domain[len] == '\n') - || (domain[len] == '\r')) - ) - domain[len--] = '\0'; - - if(domain[0] == '.') ++domain; - - elem = strrchr(domain, '.'); - while(elem) { - u_int32_t h; - - if(elem[0] == '.') elem = &elem[1]; - - h = ndpi_hash_string(elem); - - if(elem == domain) { - /* We're adding the beginning of the domain, hence the last token before quitting */ - h += END_OF_TOKENS_DELIMITER; - -#ifdef DEBUG_ADD - if(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) - printf("[add] False positive while adding %s (%s) [%u][bitmap_id: %u]\n", - elem, domain, h + hsum, bitmap_id); -#endif - } - -#ifdef DEBUG_ADD - printf("[add] Trying to add %s [%s][%u][bitmap_id: %u]\n", - elem, domain, h + hsum, bitmap_id); -#endif - - ndpi_bitmap_set(search->bitmap[bitmap_id], h + hsum); - - bitmap_id++, hsum += h; - - if(quit) - break; - - if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD) - elem = domain, quit = true; /* Hash the rest of the word */ - else { - elem[-1] = '\0'; - elem = strrchr(domain, '.'); - - if(elem == NULL) - elem = domain, quit = true; - } - } - - return(bitmap_id); -} - -/* ********************************************************** */ - -static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain) { - char *elem; - u_int32_t bitmap_id = 0, hsum = 0; - bool quit = false; - - if((elem = strrchr(domain, '.')) == NULL) - return(false); /* This does not look like a domain */ - - while(elem) { - u_int32_t h; - - if(elem[0] == '.') elem = &elem[1]; - - h = ndpi_hash_string(elem); - - if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) { - /* Exact match does not work, so let's see if a partial match works instead */ - - /* We're adding the beginning of the domain, hence the last token before quitting */ - h += END_OF_TOKENS_DELIMITER; - - return(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)); - } - - bitmap_id++, hsum += h; - - if(quit) - break; - - if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD) - elem = domain, quit = true; /* Hash the rest of the word */ - else { - elem[-1] = '\0'; - elem = strrchr(domain, '.'); - - if(elem == NULL) - elem = domain, quit = true; - } - } - - return(true); -} - -/* ********************************************************** */ -/* ********************************************************** */ - -ndpi_domain_classify* ndpi_domain_classify_alloc() { - ndpi_domain_classify_t *cat = (ndpi_domain_classify_t*)ndpi_calloc(1, sizeof(ndpi_domain_classifications_t)); + ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_calloc(1, sizeof(ndpi_domain_classify)); return((ndpi_domain_classify*)cat); } /* ********************************************************** */ -void ndpi_domain_classify_free(ndpi_domain_classify *_s) { +void ndpi_domain_classify_free(ndpi_domain_classify *s) { u_int32_t i; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - ndpi_domain_search_free(s->class[i]->domains); - ndpi_free(s->class[i]); + if(s->classes[i].domains != NULL) { + ndpi_bitmap64_free(s->classes[i].domains); } else break; } @@ -424,13 +55,12 @@ void ndpi_domain_classify_free(ndpi_domain_classify *_s) { /* ********************************************************** */ -u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) { - u_int32_t i, tot_len = sizeof(ndpi_domain_classify_t); - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; +u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s) { + u_int32_t i, tot_len = sizeof(ndpi_domain_classify); for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - tot_len += ndpi_domain_search_size(s->class[i]->domains) + sizeof(ndpi_domain_classify_t); + if(s->classes[i].domains != NULL) { + tot_len += ndpi_bitmap64_size(s->classes[i].domains); } else break; } @@ -440,30 +70,22 @@ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) { /* ********************************************************** */ -bool ndpi_domain_classify_add(ndpi_domain_classify *_s, +bool ndpi_domain_classify_add(ndpi_domain_classify *s, u_int8_t class_id, char *domain) { u_int32_t i; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; - char buf[256], *dot = strrchr(domain, '.'); + char *dot = strrchr(domain, '.'); if(!dot) return(false); if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false); for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - if(s->class[i]->class_id == class_id) { - break; - } - } else { - s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t)); - - if(s->class[i] == NULL) - return(false); - - s->class[i]->class_id = class_id; - s->class[i]->domains = ndpi_domain_search_alloc(); + if(s->classes[i].class_id == class_id) { + break; + } else if(s->classes[i].class_id == 0) { + s->classes[i].class_id = class_id; + s->classes[i].domains = ndpi_bitmap64_alloc(); break; } } @@ -471,39 +93,26 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *_s, if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS) return(false); - snprintf(buf, sizeof(buf), "%s", domain); - -#ifdef DEBUG_ADD - printf("[add] %s @ %u\n", domain, class_id); -#endif - - return(ndpi_domain_search_add(s->class[i]->domains, buf)); + return(ndpi_bitmap64_set(s->classes[i].domains, + ndpi_quick_hash64(domain, strlen(domain)))); } /* ********************************************************** */ -u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s, +u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s, u_int8_t class_id, char *file_path) { u_int32_t i, num_added = 0; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; char buf[256]; FILE *fd; char *line; for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - if(s->class[i]->class_id == class_id) { - break; - } - } else { - s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t)); - - if(s->class[i] == NULL) - return(false); - - s->class[i]->class_id = class_id; - s->class[i]->domains = ndpi_domain_search_alloc(); + if(s->classes[i].class_id == class_id) { + break; + } else if(s->classes[i].class_id == 0) { + s->classes[i].class_id = class_id; + s->classes[i].domains = ndpi_bitmap64_alloc(); break; } } @@ -531,7 +140,8 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s, line[len] = '\0'; } - if(ndpi_domain_search_add(s->class[i]->domains, line)) + if(ndpi_bitmap64_set(s->classes[i].domains, + ndpi_quick_hash64(line, strlen(line)))) num_added++; } @@ -556,12 +166,12 @@ static bool is_valid_domain_char(u_char c) { /* ********************************************************** */ -bool ndpi_domain_classify_contains(ndpi_domain_classify *_s, +bool ndpi_domain_classify_contains(ndpi_domain_classify *s, u_int8_t *class_id /* out */, char *domain) { u_int32_t i, len; - ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s; - char *dot; + u_int64_t hash; + char *dot, *elem; if(!domain) return(false); if((len = strlen(domain)) == 0) return(false); @@ -585,22 +195,32 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s, return(false); } - for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { - if(s->class[i] != NULL) { - char buf[256]; - - snprintf(buf, sizeof(buf), "%s", domain); - - if(ndpi_domain_search_contains(s->class[i]->domains, buf)) { + elem = domain; + + while(elem != NULL) { + hash = ndpi_quick_hash64(elem, strlen(elem)); + + for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) { + if(s->classes[i].class_id != 0) { + if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) { #ifdef DEBUG_CONTAINS - printf("[contains] %s = %d\n", domain, s->class[i]->class_id); + printf("[contains] %s = %d\n", domain, s->classes[i].class_id); #endif - *class_id = s->class[i]->class_id; - return(true); - } + *class_id = s->classes[i].class_id; + return(true); + } + } else + break; } - } + elem = strchr(elem, '.'); + + if((elem == NULL) || (elem == dot)) + break; + else + elem = &elem[1]; + } /* while */ + #ifdef DEBUG_CONTAINS printf("[contains] %s NOT FOUND\n", domain); #endif @@ -608,5 +228,3 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *_s, return(false); } - -#endif |