/*
* ndpi_domain_bitmap.c
*
* Copyright (C) 2011-23 - ntop.org and contributors
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* nDPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with nDPI. If not, see .
*
*/
#include
#include
#include
#include
#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_UNKNOWN
#define END_OF_TOKENS_DELIMITER 0x12345678
#define NUM_DOMAIN_BITMAPS 8
#define NUM_DOMAIN_BITMAPS_THRESHOLD (NUM_DOMAIN_BITMAPS-1)
#include "ndpi_config.h"
#include "ndpi_api.h"
#include "ndpi_includes.h"
#include "ndpi_encryption.h"
typedef struct {
ndpi_bitmap *bitmap[NUM_DOMAIN_BITMAPS];
} ndpi_domain_search;
typedef struct {
u_int16_t class_id;
ndpi_domain_search *domains;
} ndpi_domain_classify_t;
typedef struct {
ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
} ndpi_domain_classifications_t;
// #define DEBUG_ADD
// #define DEBUG_CONTAINS
/* ********************************************************** */
static void ndpi_domain_search_free(ndpi_domain_search *search) {
u_int16_t i;
for(i=0; ibitmap[i] == NULL)
break;
ndpi_bitmap_free(search->bitmap[i]);
}
ndpi_free(search);
}
/* ********************************************************** */
static ndpi_domain_search* ndpi_domain_search_alloc() {
ndpi_domain_search *search = (ndpi_domain_search*)ndpi_calloc(NUM_DOMAIN_BITMAPS, sizeof(ndpi_domain_search));
u_int16_t i;
if(!search) return(NULL);
for(i=0; ibitmap[i] = ndpi_bitmap_alloc()) == NULL)
goto toobad;
}
return(search);
toobad:
ndpi_domain_search_free(search);
return(NULL);
}
/* ********************************************************** */
static u_int32_t ndpi_domain_search_size(ndpi_domain_search *search) {
u_int32_t i, total_len = 0;
for(i=0; ibitmap[i], &buf);
ndpi_free(buf);
}
return(total_len);
}
/* ********************************************************** */
/* NOTE: domain will be modified: copy it if necessary */
static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) {
char *elem;
u_int32_t bitmap_id = 0, len;
bool quit = false;
if(domain == NULL) return(false);
if((len = strlen(domain)) == 0) return(false);
len--;
while((len > 0)
&& ((domain[len] == '.')
|| (domain[len] == '\n')
|| (domain[len] == '\r'))
)
domain[len--] = '\0';
if(domain[0] == '.') ++domain;
elem = strrchr(domain, '.');
while(elem) {
u_int32_t h;
if(elem[0] == '.') elem = &elem[1];
h = ndpi_hash_string(elem);
if(elem == domain) {
/* We're adding the beginning of the domain, hence the last token before quitting */
h += END_OF_TOKENS_DELIMITER;
}
ndpi_bitmap_set(search->bitmap[bitmap_id], h);
bitmap_id++;
if(quit)
break;
if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
elem = domain, quit = true; /* Hash the rest of the word */
else {
elem[-1] = '\0';
elem = strrchr(domain, '.');
if(elem == NULL)
elem = domain, quit = true;
}
}
return(bitmap_id);
}
/* ********************************************************** */
static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain) {
char *elem;
u_int32_t bitmap_id = 0;
bool quit = false;
if((elem = strrchr(domain, '.')) == NULL)
return(false); /* This does not look like a domain */
while(elem) {
u_int32_t h;
if(elem[0] == '.') elem = &elem[1];
h = ndpi_hash_string(elem);
if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h)) {
/* Exact match does not work, so let's see if a partial match works instead */
/* We're adding the beginning of the domain, hence the last token before quitting */
h += END_OF_TOKENS_DELIMITER;
return(ndpi_bitmap_isset(search->bitmap[bitmap_id], h));
}
bitmap_id++;
if(quit)
break;
if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
elem = domain, quit = true; /* Hash the rest of the word */
else {
elem[-1] = '\0';
elem = strrchr(domain, '.');
if(elem == NULL)
elem = domain, quit = true;
}
}
return(true);
}
/* ********************************************************** */
/* ********************************************************** */
ndpi_domain_classify* ndpi_domain_classify_alloc() {
ndpi_domain_classify_t *cat = (ndpi_domain_classify_t*)ndpi_calloc(1, sizeof(ndpi_domain_classifications_t));
return((ndpi_domain_classify*)cat);
}
/* ********************************************************** */
void ndpi_domain_classify_free(ndpi_domain_classify *_s) {
u_int32_t i;
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
for(i=0; iclass[i] != NULL) {
ndpi_domain_search_free(s->class[i]->domains);
ndpi_free(s->class[i]);
} else
break;
}
ndpi_free(s);
}
/* ********************************************************** */
u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) {
u_int32_t i, tot_len = sizeof(ndpi_domain_classify_t);
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
for(i=0; iclass[i] != NULL) {
tot_len += ndpi_domain_search_size(s->class[i]->domains) + sizeof(ndpi_domain_classify_t);
} else
break;
}
return(tot_len);
}
/* ********************************************************** */
bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
u_int16_t class_id,
char *domain) {
u_int32_t i;
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
char buf[256], *dot = strrchr(domain, '.');
if(!dot) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
for(i=0; iclass[i] != NULL) {
if(s->class[i]->class_id == class_id) {
break;
}
} else {
s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
if(s->class[i] == NULL)
return(false);
s->class[i]->class_id = class_id;
s->class[i]->domains = ndpi_domain_search_alloc();
break;
}
}
if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
return(false);
snprintf(buf, sizeof(buf), "%s", domain);
#ifdef DEBUG_ADD
printf("[add] %s @ %u\n", domain, class_id);
#endif
return(ndpi_domain_search_add(s->class[i]->domains, buf));
}
/* ********************************************************** */
u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
u_int16_t class_id,
char *file_path) {
u_int32_t i, num_added = 0;
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
char buf[256];
FILE *fd;
char *line;
for(i=0; iclass[i] != NULL) {
if(s->class[i]->class_id == class_id) {
break;
}
} else {
s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
if(s->class[i] == NULL)
return(false);
s->class[i]->class_id = class_id;
s->class[i]->domains = ndpi_domain_search_alloc();
break;
}
}
if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
return(false);
/* *************************************** */
fd = fopen(file_path, "r");
if(fd == NULL)
return(false);
while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
u_int len;
if((line[0] == '#') || (line[0] == '\0'))
continue;
else {
len = strlen(line) - 1;
if(len == 0)
continue;
else
line[len] = '\0';
}
if(ndpi_domain_search_add(s->class[i]->domains, line))
num_added++;
}
fclose(fd);
return(num_added);
}
/* ********************************************************** */
static bool is_valid_domain_char(u_char c) {
if(((c >= 'A')&& (c <= 'Z'))
|| ((c >= 'a')&& (c <= 'z'))
|| ((c >= '0')&& (c <= '9'))
|| (c == '_')
|| (c == '-')
|| (c == '.'))
return(true);
else
return(false);
}
/* ********************************************************** */
u_int16_t ndpi_domain_classify_contains(ndpi_domain_classify *_s,
char *domain) {
u_int32_t i, len;
ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
char *dot;
if(!domain) return(0);
if((len = strlen(domain)) == 0) return(0);
if((dot = strrchr(domain, '.')) == NULL) return(0);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(0);
/* This is a number or a numeric IP or similar */
if(isdigit(domain[len-1]) && isdigit(domain[0])) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s INVALID\n", domain);
#endif
return(0);
}
if(!is_valid_domain_char(domain[0])) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s INVALID\n", domain);
#endif
return(0);
}
for(i=0; iclass[i] != NULL) {
char buf[256];
snprintf(buf, sizeof(buf), "%s", domain);
if(ndpi_domain_search_contains(s->class[i]->domains, buf)) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s = %d\n", domain, s->class[i]->class_id);
#endif
return(s->class[i]->class_id);
}
}
}
#ifdef DEBUG_CONTAINS
printf("[contains] %s NOT FOUND\n", domain);
#endif
return(0);
}