aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2023-09-02 19:27:19 +0200
committerLuca Deri <deri@ntop.org>2023-09-02 19:27:19 +0200
commitf0dc3347ec92a55c16b7033e1b7f2890892b3094 (patch)
treecbecb7c62a364e9dd1501d2837ce46a2d28bc7f8 /src
parent1d480c18e381f393bd25352c6140e9651f3e3a76 (diff)
Merged new and old version of ndpi_domain_classify.c code
Diffstat (limited to 'src')
-rw-r--r--src/lib/ndpi_domain_classify.c394
-rw-r--r--src/lib/ndpi_main.c3
2 files changed, 397 insertions, 0 deletions
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index 986ebbf07..f5e6752c1 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -30,7 +30,14 @@
// #define DEBUG_ADD
// #define DEBUG_CONTAINS
+// #define USE_BINARY_BITMAP
+
+#ifdef USE_BINARY_BITMAP
+
/* ********************************************************** */
+/* ********************************************************** */
+
+/* Faster but it uses more memory */
void ndpi_domain_classify_free(ndpi_domain_classify *search) {
ndpi_binary_bitmap_free(search->bitmap);
@@ -198,3 +205,390 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *c,
return(false);
}
+#else /* ! USE_BINARY_BITMAP */
+
+/* ********************************************************** */
+/* ********************************************************** */
+
+#define END_OF_TOKENS_DELIMITER 0x12345678
+#define NUM_DOMAIN_BITMAPS 8
+#define NUM_DOMAIN_BITMAPS_THRESHOLD (NUM_DOMAIN_BITMAPS-1)
+#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 8
+
+typedef struct {
+ ndpi_bitmap *bitmap[NUM_DOMAIN_BITMAPS];
+} ndpi_domain_search;
+
+typedef struct {
+ u_int16_t class_id;
+ ndpi_domain_search *domains;
+} ndpi_domain_classify_t;
+
+typedef struct {
+ ndpi_domain_classify_t *class[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
+} ndpi_domain_classifications_t;
+
+/* ********************************************************** */
+
+static void ndpi_domain_search_free(ndpi_domain_search *search) {
+ u_int16_t i;
+
+ for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
+ if(search->bitmap[i] == NULL)
+ break;
+
+ ndpi_bitmap_free(search->bitmap[i]);
+ }
+
+ ndpi_free(search);
+}
+
+/* ********************************************************** */
+
+static ndpi_domain_search* ndpi_domain_search_alloc() {
+ ndpi_domain_search *search = (ndpi_domain_search*)ndpi_calloc(NUM_DOMAIN_BITMAPS, sizeof(ndpi_domain_search));
+ u_int16_t i;
+
+ if(!search) return(NULL);
+
+ for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
+ if((search->bitmap[i] = ndpi_bitmap_alloc()) == NULL)
+ goto toobad;
+ }
+
+ return(search);
+
+ toobad:
+ ndpi_domain_search_free(search);
+ return(NULL);
+}
+
+/* ********************************************************** */
+
+static u_int32_t ndpi_domain_search_size(ndpi_domain_search *search) {
+ u_int32_t i, total_len = 0;
+
+ for(i=0; i<NUM_DOMAIN_BITMAPS; i++) {
+ char *buf;
+
+ total_len += ndpi_bitmap_serialize(search->bitmap[i], &buf);
+ ndpi_free(buf);
+ }
+
+ return(total_len);
+}
+
+/* ********************************************************** */
+
+/* NOTE: domain will be modified: copy it if necessary */
+static bool ndpi_domain_search_add(ndpi_domain_search *search, char *domain) {
+ char *elem;
+ u_int32_t bitmap_id = 0, len, hsum = 0;
+ bool quit = false;
+
+ if(domain == NULL) return(false);
+ if((len = strlen(domain)) == 0) return(false);
+
+ len--;
+ while((len > 0)
+ && ((domain[len] == '.')
+ || (domain[len] == '\n')
+ || (domain[len] == '\r'))
+ )
+ domain[len--] = '\0';
+
+ if(domain[0] == '.') ++domain;
+
+ elem = strrchr(domain, '.');
+ while(elem) {
+ u_int32_t h;
+
+ if(elem[0] == '.') elem = &elem[1];
+
+ h = ndpi_hash_string(elem);
+
+ if(elem == domain) {
+ /* We're adding the beginning of the domain, hence the last token before quitting */
+ h += END_OF_TOKENS_DELIMITER;
+ }
+
+ ndpi_bitmap_set(search->bitmap[bitmap_id], h + hsum);
+
+ bitmap_id++, hsum += h;
+
+ if(quit)
+ break;
+
+ if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
+ elem = domain, quit = true; /* Hash the rest of the word */
+ else {
+ elem[-1] = '\0';
+ elem = strrchr(domain, '.');
+
+ if(elem == NULL)
+ elem = domain, quit = true;
+ }
+ }
+
+ return(bitmap_id);
+}
+
+/* ********************************************************** */
+
+static bool ndpi_domain_search_contains(ndpi_domain_search *search, char *domain) {
+ char *elem;
+ u_int32_t bitmap_id = 0, hsum = 0;
+ bool quit = false;
+
+ if((elem = strrchr(domain, '.')) == NULL)
+ return(false); /* This does not look like a domain */
+
+ while(elem) {
+ u_int32_t h;
+
+ if(elem[0] == '.') elem = &elem[1];
+
+ h = ndpi_hash_string(elem);
+
+ if(!ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum)) {
+ /* Exact match does not work, so let's see if a partial match works instead */
+
+ /* We're adding the beginning of the domain, hence the last token before quitting */
+ h += END_OF_TOKENS_DELIMITER;
+
+ return(ndpi_bitmap_isset(search->bitmap[bitmap_id], h + hsum));
+ }
+
+ bitmap_id++, hsum += h;
+
+ if(quit)
+ break;
+
+ if(bitmap_id == NUM_DOMAIN_BITMAPS_THRESHOLD)
+ elem = domain, quit = true; /* Hash the rest of the word */
+ else {
+ elem[-1] = '\0';
+ elem = strrchr(domain, '.');
+
+ if(elem == NULL)
+ elem = domain, quit = true;
+ }
+ }
+
+ return(true);
+}
+
+/* ********************************************************** */
+/* ********************************************************** */
+
+ndpi_domain_classify* ndpi_domain_classify_alloc() {
+ ndpi_domain_classify_t *cat = (ndpi_domain_classify_t*)ndpi_calloc(1, sizeof(ndpi_domain_classifications_t));
+
+ return((ndpi_domain_classify*)cat);
+}
+
+/* ********************************************************** */
+
+void ndpi_domain_classify_free(ndpi_domain_classify *_s) {
+ u_int32_t i;
+ ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+
+ for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
+ if(s->class[i] != NULL) {
+ ndpi_domain_search_free(s->class[i]->domains);
+ ndpi_free(s->class[i]);
+ } else
+ break;
+ }
+
+ ndpi_free(s);
+}
+
+/* ********************************************************** */
+
+u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *_s) {
+ u_int32_t i, tot_len = sizeof(ndpi_domain_classify_t);
+ ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+
+ for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
+ if(s->class[i] != NULL) {
+ tot_len += ndpi_domain_search_size(s->class[i]->domains) + sizeof(ndpi_domain_classify_t);
+ } else
+ break;
+ }
+
+ return(tot_len);
+}
+
+/* ********************************************************** */
+
+bool ndpi_domain_classify_add(ndpi_domain_classify *_s,
+ u_int8_t class_id,
+ char *domain) {
+ u_int32_t i;
+ ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+ char buf[256], *dot = strrchr(domain, '.');
+
+ if(!dot) return(false);
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
+ return(false);
+
+ for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
+ if(s->class[i] != NULL) {
+ if(s->class[i]->class_id == class_id) {
+ break;
+ }
+ } else {
+ s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
+
+ if(s->class[i] == NULL)
+ return(false);
+
+ s->class[i]->class_id = class_id;
+ s->class[i]->domains = ndpi_domain_search_alloc();
+ break;
+ }
+ }
+
+ if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
+ return(false);
+
+ snprintf(buf, sizeof(buf), "%s", domain);
+
+#ifdef DEBUG_ADD
+ printf("[add] %s @ %u\n", domain, class_id);
+#endif
+
+ return(ndpi_domain_search_add(s->class[i]->domains, buf));
+}
+
+/* ********************************************************** */
+
+u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *_s,
+ u_int8_t class_id,
+ char *file_path) {
+ u_int32_t i, num_added = 0;
+ ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+ char buf[256];
+ FILE *fd;
+ char *line;
+
+ for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
+ if(s->class[i] != NULL) {
+ if(s->class[i]->class_id == class_id) {
+ break;
+ }
+ } else {
+ s->class[i] = (ndpi_domain_classify_t*)ndpi_malloc(sizeof(ndpi_domain_classify_t));
+
+ if(s->class[i] == NULL)
+ return(false);
+
+ s->class[i]->class_id = class_id;
+ s->class[i]->domains = ndpi_domain_search_alloc();
+ break;
+ }
+ }
+
+ if(i == MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS)
+ return(false);
+
+ /* *************************************** */
+
+ fd = fopen(file_path, "r");
+ if(fd == NULL)
+ return(false);
+
+ while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
+ u_int len;
+
+ if((line[0] == '#') || (line[0] == '\0'))
+ continue;
+ else {
+ len = strlen(line) - 1;
+
+ if(len == 0)
+ continue;
+ else
+ line[len] = '\0';
+ }
+
+ if(ndpi_domain_search_add(s->class[i]->domains, line))
+ num_added++;
+ }
+
+ fclose(fd);
+
+ return(num_added);
+}
+
+/* ********************************************************** */
+
+static bool is_valid_domain_char(u_char c) {
+ if(((c >= 'A')&& (c <= 'Z'))
+ || ((c >= 'a')&& (c <= 'z'))
+ || ((c >= '0')&& (c <= '9'))
+ || (c == '_')
+ || (c == '-')
+ || (c == '.'))
+ return(true);
+ else
+ return(false);
+}
+
+/* ********************************************************** */
+
+bool ndpi_domain_classify_contains(ndpi_domain_classify *_s,
+ u_int8_t *class_id /* out */,
+ char *domain) {
+ u_int32_t i, len;
+ ndpi_domain_classifications_t *s = (ndpi_domain_classifications_t*)_s;
+ char *dot;
+
+ if(!domain) return(false);
+ if((len = strlen(domain)) == 0) return(false);
+ if((dot = strrchr(domain, '.')) == NULL) return(false);
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false);
+
+ /* This is a number or a numeric IP or similar */
+ if(isdigit(domain[len-1]) && isdigit(domain[0])) {
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s INVALID\n", domain);
+#endif
+
+ return(false);
+ }
+
+ if(!is_valid_domain_char(domain[0])) {
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s INVALID\n", domain);
+#endif
+
+ return(false);
+ }
+
+ for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
+ if(s->class[i] != NULL) {
+ char buf[256];
+
+ snprintf(buf, sizeof(buf), "%s", domain);
+
+ if(ndpi_domain_search_contains(s->class[i]->domains, buf)) {
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s = %d\n", domain, s->class[i]->class_id);
+#endif
+ *class_id = s->class[i]->class_id;
+ return(true);
+ }
+ }
+ }
+
+#ifdef DEBUG_CONTAINS
+ printf("[contains] %s NOT FOUND\n", domain);
+#endif
+
+ return(false);
+}
+
+
+#endif
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index c5a085be6..d5e2cecef 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -4206,6 +4206,9 @@ int ndpi_load_category_file(struct ndpi_detection_module_struct *ndpi_str,
while((line[len] == '\n') || (line[len] == '\r'))
line[len--] = '\0';
+ while((line[0] == '-') || (line[0] == '.'))
+ line++;
+
if(ndpi_load_category(ndpi_str, line, category_id, NULL) > 0)
num_loaded++;
}