From 291acb94591da6fd9abf7c56177e61d35726f8f6 Mon Sep 17 00:00:00 2001 From: Simone Mainardi Date: Fri, 17 Aug 2018 13:13:26 +0200 Subject: Hyperscan regex for domains such as .com.cn or .co.uk --- src/lib/ndpi_content_match.c.inc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc index 2ba38dee1..60bc3498f 100644 --- a/src/lib/ndpi_content_match.c.inc +++ b/src/lib/ndpi_content_match.c.inc @@ -8015,12 +8015,15 @@ Each part of a domain name can be no longer than 63 characters. There are no sin https://www.regular-expressions.info/email.html -We also add an optional .co to match domains such as .co.uk - (?i) is to make searches case insensitive + +(?:) is a non-capturing group used to allow patterns such as .co.uk or .com.cn + The non-capturing group is used in a nested fashion to capture the + .co, and then, optionally, another m. + */ -#define TLD "(?i)(\\.co){0,1}\\.[a-z]{2,63}$" +#define TLD "(?i)(?:\\.co(?:m)?)?\\.[a-z]{2,63}$" /* ****************************************************** */ -- cgit v1.2.3