aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIvan Nardi <12729895+IvanNardi@users.noreply.github.com>2024-07-01 08:19:35 +0200
committerGitHub <noreply@github.com>2024-07-01 08:19:35 +0200
commit2009623762e3a4411f47f422d2c8e12e813c67ac (patch)
tree60bffd9d5cd2b3eed843ab740148dd3590732d2f
parentebcea42e2b39a58c9dfa395e1992114fc0baa5ef (diff)
Add detection of OpenAI ChatGPT bots (#2481)
-rw-r--r--src/lib/ndpi_content_match.c.inc28
-rw-r--r--src/lib/ndpi_main.c3
2 files changed, 31 insertions, 0 deletions
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc
index 5c0310c3a..92b30cb87 100644
--- a/src/lib/ndpi_content_match.c.inc
+++ b/src/lib/ndpi_content_match.c.inc
@@ -20,6 +20,34 @@
/* ****************************************************** */
+
+static ndpi_network ndpi_http_crawler_bot_hardcoded_protocol_list[] = {
+
+ /* GPTBot: https://openai.com/gptbot-ranges.txt */
+ /* TODO: are we able to autogenerate this list? */
+ { 0x34E69800 /* 52.230.152.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
+ { 0x34E96A00 /* 52.233.106.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
+
+ /* ChatGPT-User bot: https://platform.openai.com/docs/plugins/bot */
+ /* TODO: are we able to autogenerate this list? */
+ { 0x17628EB0 /* 23.98.142.176/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x2854B4E0 /* 40.84.180.224/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x0D41F0F0 /* 13.65.240.240/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x1461BD60 /* 20.97.189.96/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x14A14BD0 /* 20.161.75.208/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x34E14BD0 /* 52.225.75.208/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x349C4D90 /* 52.156.77.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+ { 0x2854B440 /* 40.84.180.64/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
+
+ /* End */
+ { 0x0, 0, 0 }
+};
+
+static ndpi_network6 ndpi_http_crawler_bot_hardcoded_protocol_list_6[] = {
+ /* End */
+ { NULL, 0, 0 },
+};
+
static ndpi_network host_protocol_list[] = {
/*
OCS GO (Orange Cinéma Séries)
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 1d3058d9f..f2162e005 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -3724,6 +3724,9 @@ int ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str)
if(ndpi_str->cfg.risk_crawler_bot_list_enabled) {
ndpi_init_ptree_ipv4(ndpi_str->ip_risk->v4, ndpi_http_crawler_bot_protocol_list);
ndpi_init_ptree_ipv6(ndpi_str, ndpi_str->ip_risk->v6, ndpi_http_crawler_bot_protocol_list_6);
+ /* Hard-coded lists */
+ ndpi_init_ptree_ipv4(ndpi_str->ip_risk->v4, ndpi_http_crawler_bot_hardcoded_protocol_list);
+ ndpi_init_ptree_ipv6(ndpi_str, ndpi_str->ip_risk->v6, ndpi_http_crawler_bot_hardcoded_protocol_list_6);
}
}