/*
* ndpi_domain_classify.c
*
* Copyright (C) 2011-23 - ntop.org and contributors
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* nDPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with nDPI. If not, see .
*
*/
#include
#include
#include
#include
#include "ndpi_config.h"
#include "ndpi_api.h"
#if 0
#define DEBUG_ADD
#define DEBUG_CONTAINS
#endif
ndpi_domain_classify* ndpi_domain_classify_alloc() {
int i;
ndpi_domain_classify *cat = (ndpi_domain_classify*)ndpi_malloc(sizeof(ndpi_domain_classify));
if(!cat)
return NULL;
for(i=0; iclasses[i].class_id = 0, cat->classes[i].domains = NULL;
return((ndpi_domain_classify*)cat);
}
/* ********************************************************** */
void ndpi_domain_classify_free(ndpi_domain_classify *s) {
u_int32_t i;
if(!s)
return;
for(i=0; iclasses[i].domains != NULL) {
ndpi_bitmap64_free(s->classes[i].domains);
} else
break;
}
ndpi_free(s);
}
/* ********************************************************** */
u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s) {
u_int32_t i, tot_len = sizeof(ndpi_domain_classify);
if(!s)
return(0);
for(i=0; iclasses[i].domains != NULL) {
tot_len += ndpi_bitmap64_size(s->classes[i].domains);
} else
break;
}
return(tot_len);
}
/* ********************************************************** */
bool ndpi_domain_classify_add(ndpi_domain_classify *s,
u_int8_t class_id,
const char *domain) {
u_int32_t i;
char *dot;
if(!s || !domain)
return(false);
/* Skip initial string . in domain names */
while(domain[0] == '.') domain++;
dot = strrchr(domain, '.');
if(!dot) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
return(false);
for(i=0; iclasses[i].class_id == class_id) {
break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
s->classes[i].domains = ndpi_bitmap64_alloc();
if(!s->classes[i].domains)
s->classes[i].class_id = 0;
break;
}
}
if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
return(false);
return(ndpi_bitmap64_set(s->classes[i].domains,
ndpi_quick_hash64(domain, strlen(domain))));
}
/* ********************************************************** */
u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
u_int8_t class_id,
char *file_path) {
u_int32_t i, num_added = 0;
char buf[256];
FILE *fd;
char *line;
if(!s || !file_path)
return(false);
for(i=0; iclasses[i].class_id == class_id) {
break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
s->classes[i].domains = ndpi_bitmap64_alloc();
if(!s->classes[i].domains)
s->classes[i].class_id = 0;
break;
}
}
if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
return(false);
/* *************************************** */
fd = fopen(file_path, "r");
if(fd == NULL)
return(false);
while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
u_int len;
u_int64_t hash;
if((line[0] == '#') || (line[0] == '\0'))
continue;
else {
len = strlen(line) - 1;
if(len == 0)
continue;
else
line[len] = '\0';
}
hash = ndpi_quick_hash64(line, strlen(line));
if(ndpi_bitmap64_set(s->classes[i].domains, hash))
num_added++;
}
fclose(fd);
return(num_added);
}
/* ********************************************************** */
bool ndpi_domain_classify_finalize(ndpi_domain_classify *s) {
u_int32_t i;
if(!s)
return(false);
for(i=0; iclasses[i].class_id != 0) {
ndpi_bitmap64_compress(s->classes[i].domains);
}
}
return(true);
}
/* ********************************************************** */
static bool is_valid_domain_char(u_char c) {
if(((c >= 'A')&& (c <= 'Z'))
|| ((c >= 'a')&& (c <= 'z'))
|| ((c >= '0')&& (c <= '9'))
|| (c == '_')
|| (c == '-')
|| (c == '.'))
return(true);
else
return(false);
}
/* ********************************************************** */
bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
u_int8_t *class_id /* out */,
const char *domain) {
u_int32_t i, len;
const char *dot, *elem;
if(!domain || !s) return(false);
if((len = strlen(domain)) == 0) return(false);
if((dot = strrchr(domain, '.')) == NULL) return(false);
if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false);
/* This is a number or a numeric IP or similar */
if(isdigit(domain[len-1]) && isdigit(domain[0])) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s INVALID\n", domain);
#endif
return(false);
}
if(!is_valid_domain_char(domain[0])) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s INVALID\n", domain);
#endif
return(false);
}
elem = domain;
while(elem != NULL) {
u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem));
for(i=0; iclasses[i].class_id != 0) {
if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) {
#ifdef DEBUG_CONTAINS
printf("[contains] %s = %d\n", domain, s->classes[i].class_id);
#endif
*class_id = s->classes[i].class_id;
return(true);
}
} else
break;
}
elem = strchr(elem, '.');
if((elem == NULL) || (elem == dot))
break;
else
elem = &elem[1];
} /* while */
#ifdef DEBUG_CONTAINS
printf("[contains] %s NOT FOUND\n", domain);
#endif
return(false);
}