aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorLuca Deri <lucaderi@users.noreply.github.com>2024-04-18 23:21:40 +0200
committerGitHub <noreply@github.com>2024-04-18 23:21:40 +0200
commitad117bfaabd3bc75dc70d0ddbc4ba18c86c40dbd (patch)
tree3b1fb6016da1e114bca190ed6a868421fd9c32f1 /src/include
parent108b8331d5b345e110c9ef110a6aa95a2767a640 (diff)
Domain Classification Improvements (#2396)
* Added size_t ndpi_compress_str(const char * in, size_t len, char * out, size_t bufsize); size_t ndpi_decompress_str(const char * in, size_t len, char * out, size_t bufsize); used to compress short strings such as domain names. This code is based on https://github.com/Ed-von-Schleck/shoco * Major code rewrite for ndpi_hash and ndpi_domain_classify * Improvements to make sure custom categories are loaded and enabled * Fixed string encoding * Extended SalesForce/Cloudflare domains list
Diffstat (limited to 'src/include')
-rw-r--r--src/include/ndpi_api.h60
-rw-r--r--src/include/ndpi_private.h2
-rw-r--r--src/include/ndpi_typedefs.h11
3 files changed, 39 insertions, 34 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 34617c535..acc01fb0f 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1931,11 +1931,9 @@ extern "C" {
* Free the hashmap.
*
* @par h = pointer to the hash map [in, out]
- * @par cleanup_func = pointer to a optional callback function
- * called for each element in the hashmap [in]
*
*/
- void ndpi_hash_free(ndpi_str_hash **h, void (*cleanup_func)(ndpi_str_hash *h));
+ void ndpi_hash_free(ndpi_str_hash **h);
/**
* Search for an entry in the hashmap.
@@ -1949,7 +1947,7 @@ extern "C" {
* @return 0 if an entry with that key was found, 1 otherwise
*
*/
- int ndpi_hash_find_entry(ndpi_str_hash *h, char *key, u_int key_len, void **value);
+ int ndpi_hash_find_entry(ndpi_str_hash *h, char *key, u_int key_len, u_int16_t *value);
/**
* Add an entry to the hashmap.
@@ -1957,12 +1955,12 @@ extern "C" {
* @par h = pointer to the hash map [in, out]
* @par key = character string (no '\0' required) [in]
* @par key_len = length of the character string @key [in]
- * @par value = pointer to the value to add [in]
+ * @par value = value to add [in]
*
* @return 0 if the entry was added, 1 otherwise
*
*/
- int ndpi_hash_add_entry(ndpi_str_hash **h, char *key, u_int8_t key_len, void *value);
+ int ndpi_hash_add_entry(ndpi_str_hash **h, char *key, u_int8_t key_len, u_int16_t value);
/* ******************************* */
@@ -2076,23 +2074,21 @@ extern "C" {
for substring domain matching and classification
*/
- ndpi_domain_classify* ndpi_domain_classify_alloc(void);
- void ndpi_domain_classify_free(ndpi_domain_classify *s);
- u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s);
- bool ndpi_domain_classify_add(ndpi_domain_classify *s,
- u_int8_t class_id, const char *domain);
- u_int32_t ndpi_domain_classify_add_domains(ndpi_domain_classify *s,
- u_int8_t class_id,
- char *file_path);
- bool ndpi_domain_classify_finalize(ndpi_domain_classify *s);
- const char* ndpi_domain_classify_longest_prefix(ndpi_domain_classify *s,
- u_int8_t *class_id /* out */,
- const char *hostnname,
- bool return_subprefix);
- bool ndpi_domain_classify_contains(ndpi_domain_classify *s,
- u_int8_t *class_id /* out */,
- const char *domain);
-
+ ndpi_domain_classify* ndpi_domain_classify_alloc();
+ void ndpi_domain_classify_free(ndpi_domain_classify *s);
+ u_int32_t ndpi_domain_classify_size(ndpi_domain_classify *s);
+ bool ndpi_domain_classify_add(struct ndpi_detection_module_struct *ndpi_mod,
+ ndpi_domain_classify *s,
+ u_int16_t class_id, char *domain);
+ u_int32_t ndpi_domain_classify_add_domains(struct ndpi_detection_module_struct *ndpi_mod,
+ ndpi_domain_classify *s,
+ u_int16_t class_id,
+ char *file_path);
+ bool ndpi_domain_classify_hostname(struct ndpi_detection_module_struct *ndpi_mod,
+ ndpi_domain_classify *s,
+ u_int16_t *class_id /* out */,
+ char *hostname);
+
/* ******************************* */
/*
@@ -2160,12 +2156,14 @@ extern "C" {
*
* @par ndpi_str = the struct created for the protocol detection
* @par hostname = the hostname from which the domain name has to be extracted
+ * @par suffix_id = the id of the returned domain
*
* @return The host domain name suffic or the host itself if not found.
*
*/
const char* ndpi_get_host_domain_suffix(struct ndpi_detection_module_struct *ndpi_str,
- const char *hostname);
+ const char *hostname,
+ u_int16_t *suffix_id /* out */);
/**
* Returns the domain (including the TLS) suffix out of the specified hostname.
@@ -2217,6 +2215,20 @@ extern "C" {
/* ******************************* */
+ size_t ndpi_compress_str(const char * in, size_t len, char * out, size_t bufsize);
+ size_t ndpi_decompress_str(const char * in, size_t len, char * out, size_t bufsize);
+
+ /* ******************************* */
+
+ /* NOTE
+ this function works best if yout have loaded in memory domain
+ suffixes using ndpi_load_domain_suffixes()
+ */
+ u_int ndpi_encode_domain(struct ndpi_detection_module_struct *ndpi_str,
+ char *domain, char *out, u_int out_len);
+
+ /* ******************************* */
+
const char *ndpi_lru_cache_idx_to_name(lru_cache_type idx);
/**
diff --git a/src/include/ndpi_private.h b/src/include/ndpi_private.h
index ee6302626..ece904278 100644
--- a/src/include/ndpi_private.h
+++ b/src/include/ndpi_private.h
@@ -400,7 +400,7 @@ struct ndpi_detection_module_struct {
u_int16_t max_payload_track_len;
- ndpi_domain_classify *public_domain_suffixes;
+ ndpi_str_hash *public_domain_suffixes;
};
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 9dc3fdc98..86c7df8ab 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -1127,11 +1127,7 @@ typedef struct _ndpi_automa {
struct ndpi_automa_stats stats;
} ndpi_automa;
-typedef struct ndpi_str_hash {
- unsigned int hash;
- void *value;
- // u_int8_t private_data[1]; /* Avoid error C2466 and do not initiate private data with 0 */
-} ndpi_str_hash;
+typedef void ndpi_str_hash;
typedef struct ndpi_proto {
/*
@@ -1164,10 +1160,7 @@ typedef struct {
#define MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS 16
typedef struct {
- struct {
- u_int16_t class_id;
- ndpi_bitmap64_fuse *domains;
- } classes[MAX_NUM_NDPI_DOMAIN_CLASSIFICATIONS];
+ ndpi_str_hash *domains;
} ndpi_domain_classify;
typedef enum {