aboutsummaryrefslogtreecommitdiff
path: root/example/ndpiReader.c
diff options
context:
space:
mode:
authorIvan Nardi <12729895+IvanNardi@users.noreply.github.com>2024-03-06 19:25:59 +0100
committerGitHub <noreply@github.com>2024-03-06 19:25:59 +0100
commit21da53d3a03cad32dffa8447d9c4ae5bae62a3a2 (patch)
tree6ed55bf40dfcf2976df48933b38e6f014a3fe852 /example/ndpiReader.c
parent8f63a1173539a79c1fa1bb5c618fe561175a1ab5 (diff)
ahocorasick: improve matching with subdomains (#2331)
The basic idea is to have the following logic: * pattern "DOMAIN" matches the domain itself (i.e exact match) *and* any subdomains (i.e. "ANYTHING.DOMAIN") * pattern "DOMAIN." matches *also* any strings for which is a prefix [please, note that this kind of match is handy but it is quite dangerous...] * pattern "-DOMAIN" matches *also* any strings for which is a postfix Examples: * pattern "wikipedia.it": * "wikipiedia.it" -> OK * "foo.wikipedia.it -> OK * "foowikipedia.it -> NO MATCH * "wikipedia.it.com -> NO MATCH * pattern "wikipedia.": * "wikipedia.it" -> OK * "foo.wikipedia.it -> OK * "foowikipedia.it -> NO MATCH * "wikipedia.it.com -> OK * pattern "-wikipedia.it": * "wikipedia.it" -> NO MATCH * "foo.wikipedia.it -> NO MATCH * "0001-wikipedia.it -> OK * "foo.0001-wikipedia.it -> OK Bottom line: * exact match * prefix with "." (always, implicit) * prefix with "-" (only if esplicitly set) * postfix with "." (only if esplicitly set) That means that the patterns cannot start with '.' anymore. Close #2330
Diffstat (limited to 'example/ndpiReader.c')
-rw-r--r--example/ndpiReader.c43
1 files changed, 43 insertions, 0 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 361cc36a7..ef5307934 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -5005,6 +5005,48 @@ void automataUnitTest() {
ndpi_free_automa(automa);
}
+/* *********************************************** */
+
+void automataDomainsUnitTest() {
+ void *automa = ndpi_init_automa_domain();
+
+ assert(automa);
+ assert(ndpi_add_string_to_automa(automa, ndpi_strdup("wikipedia.it")) == 0);
+ ndpi_finalize_automa(automa);
+ assert(ndpi_match_string(automa, "wikipedia.it") == 1);
+ assert(ndpi_match_string(automa, "foo.wikipedia.it") == 1);
+ assert(ndpi_match_string(automa, "foowikipedia.it") == 0);
+ assert(ndpi_match_string(automa, "foowikipedia") == 0);
+ assert(ndpi_match_string(automa, "-wikipedia.it") == 0);
+ assert(ndpi_match_string(automa, "foo-wikipedia.it") == 0);
+ assert(ndpi_match_string(automa, "wikipedia.it.com") == 0);
+ ndpi_free_automa(automa);
+
+ automa = ndpi_init_automa_domain();
+ assert(automa);
+ assert(ndpi_add_string_to_automa(automa, ndpi_strdup("wikipedia.")) == 0);
+ ndpi_finalize_automa(automa);
+ assert(ndpi_match_string(automa, "wikipedia.it") == 1);
+ assert(ndpi_match_string(automa, "foo.wikipedia.it") == 1);
+ assert(ndpi_match_string(automa, "foowikipedia.it") == 0);
+ assert(ndpi_match_string(automa, "foowikipedia") == 0);
+ assert(ndpi_match_string(automa, "-wikipedia.it") == 0);
+ assert(ndpi_match_string(automa, "foo-wikipedia.it") == 0);
+ assert(ndpi_match_string(automa, "wikipediafoo") == 0);
+ assert(ndpi_match_string(automa, "wikipedia.it.com") == 1);
+ ndpi_free_automa(automa);
+
+ automa = ndpi_init_automa_domain();
+ assert(automa);
+ assert(ndpi_add_string_to_automa(automa, ndpi_strdup("-buy.itunes.apple.com")) == 0);
+ ndpi_finalize_automa(automa);
+ assert(ndpi_match_string(automa, "buy.itunes.apple.com") == 0);
+ assert(ndpi_match_string(automa, "p53-buy.itunes.apple.com") == 1);
+ assert(ndpi_match_string(automa, "p53buy.itunes.apple.com") == 0);
+ assert(ndpi_match_string(automa, "foo.p53-buy.itunes.apple.com") == 1);
+ ndpi_free_automa(automa);
+}
+
#endif
/* *********************************************** */
@@ -5927,6 +5969,7 @@ int main(int argc, char **argv) {
bitmapUnitTest();
filterUnitTest();
automataUnitTest();
+ automataDomainsUnitTest();
analyzeUnitTest();
ndpi_self_check_host_match(stderr);
analysisUnitTest();