aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h35
-rw-r--r--src/include/ndpi_private.h12
-rw-r--r--src/lib/ndpi_domain_classify.c74
-rw-r--r--src/lib/ndpi_domains.c93
-rw-r--r--src/lib/ndpi_main.c9
5 files changed, 185 insertions, 38 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index b20305e33..f4c2f6114 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -2147,6 +2147,9 @@ extern "C" {
u_int8_t class_id,
char *file_path);
bool ndpi_domain_classify_finalize(ndpi_domain_classify *s);
+ const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
+ u_int8_t *class_id /* out */,
+ const char *hostnname);
bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
u_int8_t *class_id /* out */,
const char *domain);
@@ -2188,6 +2191,8 @@ extern "C" {
/**
* Get user data which was previously set with `ndpi_set_user_data()`.
*
+ * @par ndpi_str = the struct created for the protocol detection
+ *
* @return the user data pointer
*
*/
@@ -2195,6 +2200,36 @@ extern "C" {
/* ******************************* */
+ /**
+ * Loads the domain suffixes from the specified path. You need to
+ * perform this action once
+ *
+ * @par ndpi_str = the struct created for the protocol detection
+ * @par public_suffix_list_path = path of the public_suffix_list path
+ *
+ * @return 0 = no error, -1 otherwise
+ *
+ */
+ int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str,
+ char *public_suffix_list_path);
+
+ /**
+ * Returns the domain suffix out of the specified hostname.
+ * The returned pointer is an offset of the original hostname.
+ * Note that you need to call ndpi_load_domain_suffixes() before
+ * calling this function.
+ *
+ * @par ndpi_str = the struct created for the protocol detection
+ * @par hostname = the hostname from which the domain name has to be extracted
+ *
+ * @return The host domain name or the hostitself if not found.
+ *
+ */
+ const char* ndpi_get_host_domain_suffix(struct ndpi_detection_module_struct *ndpi_str,
+ const char *hostname);
+
+ /* ******************************* */
+
/* Can't call libc functions from kernel space, define some stub instead */
#define ndpi_isalpha(ch) (((ch) >= 'a' && (ch) <= 'z') || ((ch) >= 'A' && (ch) <= 'Z'))
diff --git a/src/include/ndpi_private.h b/src/include/ndpi_private.h
index d0adfa362..df3bfaf2c 100644
--- a/src/include/ndpi_private.h
+++ b/src/include/ndpi_private.h
@@ -152,7 +152,7 @@ struct ndpi_detection_module_struct {
u_int16_t num_tls_blocks_to_follow;
u_int8_t skip_tls_blocks_until_change_cipher:1, _notused:7;
u_int8_t tls_certificate_expire_in_x_days;
-
+
void *user_data;
char custom_category_labels[NUM_CUSTOM_CATEGORIES][CUSTOM_CATEGORY_LABEL_LEN];
@@ -206,11 +206,11 @@ struct ndpi_detection_module_struct {
/* Patricia trees */
ndpi_patricia_tree_t *ip_risk_mask_ptree;
ndpi_patricia_tree_t *ip_risk_mask_ptree6;
- ndpi_patricia_tree_t *ip_risk_ptree;
+ ndpi_patricia_tree_t *ip_risk_ptree;
ndpi_patricia_tree_t *ip_risk_ptree6;
ndpi_patricia_tree_t *protocols_ptree; /* IP-based protocol detection */
ndpi_patricia_tree_t *protocols_ptree6;
-
+
/* *** If you add a new Patricia tree, please update ptree_type above! *** */
struct {
@@ -256,7 +256,7 @@ struct ndpi_detection_module_struct {
struct ndpi_lru_cache *tls_cert_cache;
u_int32_t tls_cert_cache_num_entries;
int32_t tls_cert_cache_ttl;
-
+
/* NDPI_PROTOCOL_MINING and subprotocols */
struct ndpi_lru_cache *mining_cache;
u_int32_t mining_cache_num_entries;
@@ -302,7 +302,9 @@ struct ndpi_detection_module_struct {
nbpf_filter nbpf_custom_proto[MAX_NBPF_CUSTOM_PROTO];
#endif
- u_int16_t max_payload_track_len;
+ u_int16_t max_payload_track_len;
+
+ ndpi_domain_classify *public_domain_suffixes;
};
diff --git a/src/lib/ndpi_domain_classify.c b/src/lib/ndpi_domain_classify.c
index 2ca071ca0..382023947 100644
--- a/src/lib/ndpi_domain_classify.c
+++ b/src/lib/ndpi_domain_classify.c
@@ -89,7 +89,7 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
u_int32_t i;
char *dot;
- if(!s || !domain)
+ if((!s) || (!domain))
return(false);
/* Skip initial string . in domain names */
@@ -97,18 +97,21 @@ bool ndpi_domain_classify_add(ndpi_domain_classify *s,
dot = strrchr(domain, '.');
- if(!dot) return(false);
- if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
- return(false);
-
+ if(dot) {
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local")))
+ return(false);
+ }
+
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
if(s->classes[i].class_id == class_id) {
break;
} else if(s->classes[i].class_id == 0) {
s->classes[i].class_id = class_id;
s->classes[i].domains = ndpi_bitmap64_alloc();
+
if(!s->classes[i].domains)
s->classes[i].class_id = 0;
+
break;
}
}
@@ -130,7 +133,7 @@ u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
FILE *fd;
char *line;
- if(!s || !file_path)
+ if((!s) || (!file_path))
return(false);
for(i=0; i<MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS; i++) {
@@ -199,9 +202,9 @@ bool ndpi_domain_classify_finalize(ndpi_domain_classify *s) {
/* ********************************************************** */
static bool is_valid_domain_char(u_char c) {
- if(((c >= 'A')&& (c <= 'Z'))
- || ((c >= 'a')&& (c <= 'z'))
- || ((c >= '0')&& (c <= '9'))
+ if(((c >= 'A') && (c <= 'Z'))
+ || ((c >= 'a') && (c <= 'z'))
+ || ((c >= '0') && (c <= '9'))
|| (c == '_')
|| (c == '-')
|| (c == '.'))
@@ -212,35 +215,37 @@ static bool is_valid_domain_char(u_char c) {
/* ********************************************************** */
-bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
- u_int8_t *class_id /* out */,
- const char *domain) {
+const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
+ u_int8_t *class_id /* out */,
+ const char *hostname) {
u_int32_t i, len;
const char *dot, *elem;
- if(!domain || !s) return(false);
- if((len = strlen(domain)) == 0) return(false);
- if((dot = strrchr(domain, '.')) == NULL) return(false);
- if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(false);
+ *class_id = 0; /* Unknown class_id */
+
+ if(!hostname || !s) return(hostname);
+ if((len = strlen(hostname)) == 0) return(hostname);
+ if((dot = strrchr(hostname, '.')) == NULL) return(hostname);
+ if((!strcmp(dot, ".arpa")) || (!strcmp(dot, ".local"))) return(hostname);
/* This is a number or a numeric IP or similar */
- if(ndpi_isdigit(domain[len-1]) && isdigit(domain[0])) {
+ if(ndpi_isdigit(hostname[len-1]) && isdigit(hostname[0])) {
#ifdef DEBUG_CONTAINS
- printf("[contains] %s INVALID\n", domain);
+ printf("[contains] %s INVALID\n", hostname);
#endif
- return(false);
+ return(hostname);
}
- if(!is_valid_domain_char(domain[0])) {
+ if(!is_valid_domain_char(hostname[0])) {
#ifdef DEBUG_CONTAINS
- printf("[contains] %s INVALID\n", domain);
+ printf("[contains] %s INVALID\n", hostname);
#endif
- return(false);
+ return(hostname);
}
- elem = domain;
+ elem = hostname;
while(elem != NULL) {
u_int64_t hash = ndpi_quick_hash64(elem, strlen(elem));
@@ -249,10 +254,10 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
if(s->classes[i].class_id != 0) {
if(ndpi_bitmap64_isset(s->classes[i].domains, hash)) {
#ifdef DEBUG_CONTAINS
- printf("[contains] %s = %d\n", domain, s->classes[i].class_id);
+ printf("[contains] %s = %d\n", hostname, s->classes[i].class_id);
#endif
*class_id = s->classes[i].class_id;
- return(true);
+ return(elem);
}
} else
break;
@@ -260,16 +265,23 @@ bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
elem = strchr(elem, '.');
- if((elem == NULL) || (elem == dot))
+ if((elem == NULL) /* || (elem == dot) */)
break;
else
elem = &elem[1];
} /* while */
-
-#ifdef DEBUG_CONTAINS
- printf("[contains] %s NOT FOUND\n", domain);
-#endif
- return(false);
+ /* Not found */
+ return(hostname);
+}
+
+/* ********************************************************** */
+
+bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
+ u_int8_t *class_id /* out */,
+ const char *domain) {
+ (void)ndpi_domain_classify_longest_prefix(s, class_id, domain); /* UNUSED */
+
+ return((*class_id == 0) ? false : true);
}
diff --git a/src/lib/ndpi_domains.c b/src/lib/ndpi_domains.c
new file mode 100644
index 000000000..12f735b36
--- /dev/null
+++ b/src/lib/ndpi_domains.c
@@ -0,0 +1,93 @@
+/*
+ * ndpi_domains.c
+ *
+ * Copyright (C) 2011-24 - ntop.org and contributors
+ *
+ * nDPI is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * nDPI is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with nDPI. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "ndpi_config.h"
+#include "ndpi_api.h"
+#include "ndpi_includes.h"
+#include "ndpi_private.h"
+
+/* ******************************* */
+
+int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str,
+ char *public_suffix_list_path) {
+ char buf[256], *line;
+ FILE *fd;
+ bool do_trace = false;
+ u_int num_domains = 0;
+
+ if(public_suffix_list_path == NULL)
+ return(-1);
+
+ if((fd = fopen(public_suffix_list_path, "r")) == NULL)
+ return(-2);
+
+ if(ndpi_str->public_domain_suffixes != NULL) {
+ /* An existing license was aleady loaded: free it and start over */
+ ndpi_domain_classify_free(ndpi_str->public_domain_suffixes);
+ }
+
+ if((ndpi_str->public_domain_suffixes = ndpi_domain_classify_alloc()) == NULL)
+ return(-3);
+
+ while((line = fgets(buf, sizeof(buf), fd)) != NULL) {
+ u_int offset, len;
+
+ /* Skip empty lines or comments */
+ if((line[0] == '\0') || (line[0] == '/') || (line[0] == '\n') || (line[0] == '\r'))
+ continue;
+
+ if((line[0] == '*') && (line[1] == '.') && (line[2] != '\0'))
+ offset = 2;
+ else
+ offset = 0;
+
+ len = strlen(line) - 1;
+ while((len > 0) && (line[len] == '\n'))
+ line[len--] = '\0';
+
+ if(!ndpi_domain_classify_add(ndpi_str->public_domain_suffixes,
+ 1 /* dummy */, &line[offset])) {
+ if(do_trace) printf("Error while processing domain %s\n", &line[offset]);
+ } else
+ num_domains++;
+ }
+
+ if(!ndpi_domain_classify_finalize(ndpi_str->public_domain_suffixes)) {
+ if(do_trace) printf("Error while finalizing domain processing\n");
+ }
+
+ if(do_trace) printf("Loaded %u domains\n", num_domains);
+
+ return(0);
+}
+
+/* ******************************* */
+
+const char* ndpi_get_host_domain_suffix(struct ndpi_detection_module_struct *ndpi_str,
+ const char *hostname) {
+ if(ndpi_str->public_domain_suffixes == NULL)
+ return(hostname);
+ else {
+ u_int8_t class_id;
+
+ return(ndpi_domain_classify_longest_prefix(ndpi_str->public_domain_suffixes,
+ &class_id, hostname));
+ }
+}
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index d870ffe5c..28c53714f 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -4000,9 +4000,14 @@ void ndpi_exit_detection_module(struct ndpi_detection_module_struct *ndpi_str) {
ndpi_free_geoip(ndpi_str);
if(ndpi_str->callback_buffer)
- ndpi_free(ndpi_str->callback_buffer);
+ ndpi_free(ndpi_str->callback_buffer);
+
if(ndpi_str->callback_buffer_tcp_payload)
- ndpi_free(ndpi_str->callback_buffer_tcp_payload);
+ ndpi_free(ndpi_str->callback_buffer_tcp_payload);
+
+ if(ndpi_str->public_domain_suffixes)
+ ndpi_domain_classify_free(ndpi_str->public_domain_suffixes);
+
ndpi_free(ndpi_str);
}