aboutsummaryrefslogtreecommitdiff
path: root/utils/gambling_sites_download.sh
diff options
context:
space:
mode:
authorsnicket2100 <57048005+snicket2100@users.noreply.github.com>2023-07-14 09:55:46 +0200
committerGitHub <noreply@github.com>2023-07-14 09:55:46 +0200
commitabee1a2a6f1d8375831901e49ace85eaea0650e3 (patch)
tree275ca11c158ccb6b73f112da45df6a934f811606 /utils/gambling_sites_download.sh
parent0dd1ee5656e94e42e919d88d2d783b7e53e246a5 (diff)
Included Gambling website data from the Polish `hazard.mf.gov.pl` list (#2041)
* Refreshed the Belgium Gambling Site list data Unfortunately some hostnames have been removed from that list, which means they are disappearing from the `ndpi_gambling_match.c.inc` file as well. * build: added `libxml2-utils` (for `xmllint`) * Included Gambling website data from the Polish `hazard.mf.gov.pl` list The list contains over 30k gambling website hostnames as of today.
Diffstat (limited to 'utils/gambling_sites_download.sh')
-rwxr-xr-xutils/gambling_sites_download.sh6
1 files changed, 5 insertions, 1 deletions
diff --git a/utils/gambling_sites_download.sh b/utils/gambling_sites_download.sh
index 3340cf237..f80db68f6 100755
--- a/utils/gambling_sites_download.sh
+++ b/utils/gambling_sites_download.sh
@@ -12,8 +12,12 @@ printf '(1) %s\n' "Scraping Illegal Gambling Sites (Belgium)"
DOMAINS="$(curl -s 'https://www.gamingcommission.be/en/gaming-commission/illegal-games-of-chance/list-of-illegal-gambling-sites' | sed -n 's/^<td[^>]\+>\(.\+\.[a-zA-Z0-9]\+\)\(\|\/.*[^<]*\)<\/td>/\1/gp' || exit 1)"
is_str_empty "${DOMAINS}" "Please check gambling sites URL and sed REGEX."
+printf '(1) %s\n' "Downloading Gambling Sites (Poland)"
+DOMAINS_PL="$(curl -s https://hazard.mf.gov.pl/api/Register | xmllint --xpath "/*[local-name(.)='Rejestr']/*[local-name(.)='PozycjaRejestru']/*[local-name(.)='AdresDomeny']/text()" -)"
+is_str_empty "${DOMAINS_PL}" "Please check gambling sites URL and XPath."
+
printf '(2) %s\n' "Processing IP addresses..."
-echo "${DOMAINS}" >${LIST}
+echo "${DOMAINS}" "${DOMAINS_PL}" | sort | uniq >${LIST}
./hostname2list.py "${LIST}" "Gambling" NDPI_PROTOCOL_GAMBLING NDPI_PROTOCOL_CATEGORY_WEB NDPI_PROTOCOL_UNSAFE >${DEST}
rm -f "${LIST}"
is_file_empty "${DEST}"