diff options
author | Luca Deri <deri@ntop.org> | 2023-08-26 17:47:51 +0200 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2023-08-26 17:55:50 +0200 |
commit | eeeee46b1e7828587d4570d754360a40350302e8 (patch) | |
tree | a91d852bac76303e8502aa0b6a120eb78cda5b52 /utils/gambling_sites_download.sh | |
parent | 4ca94369e1d54631c59719db74ac2db4ca318361 (diff) |
Changes for supporinng more efficient sub-string matching
Diffstat (limited to 'utils/gambling_sites_download.sh')
-rwxr-xr-x | utils/gambling_sites_download.sh | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/utils/gambling_sites_download.sh b/utils/gambling_sites_download.sh index f80db68f6..135e77889 100755 --- a/utils/gambling_sites_download.sh +++ b/utils/gambling_sites_download.sh @@ -5,21 +5,17 @@ set -e cd "$(dirname "${0}")" || exit 1 . ./common.sh || exit 1 -DEST=../src/lib/inc_generated/ndpi_gambling_match.c.inc -LIST=/tmp/gambling.list +LIST=../lists/gambling.list printf '(1) %s\n' "Scraping Illegal Gambling Sites (Belgium)" DOMAINS="$(curl -s 'https://www.gamingcommission.be/en/gaming-commission/illegal-games-of-chance/list-of-illegal-gambling-sites' | sed -n 's/^<td[^>]\+>\(.\+\.[a-zA-Z0-9]\+\)\(\|\/.*[^<]*\)<\/td>/\1/gp' || exit 1)" is_str_empty "${DOMAINS}" "Please check gambling sites URL and sed REGEX." -printf '(1) %s\n' "Downloading Gambling Sites (Poland)" +printf '(2) %s\n' "Downloading Gambling Sites (Poland)" DOMAINS_PL="$(curl -s https://hazard.mf.gov.pl/api/Register | xmllint --xpath "/*[local-name(.)='Rejestr']/*[local-name(.)='PozycjaRejestru']/*[local-name(.)='AdresDomeny']/text()" -)" is_str_empty "${DOMAINS_PL}" "Please check gambling sites URL and XPath." -printf '(2) %s\n' "Processing IP addresses..." echo "${DOMAINS}" "${DOMAINS_PL}" | sort | uniq >${LIST} -./hostname2list.py "${LIST}" "Gambling" NDPI_PROTOCOL_GAMBLING NDPI_PROTOCOL_CATEGORY_WEB NDPI_PROTOCOL_UNSAFE >${DEST} -rm -f "${LIST}" -is_file_empty "${DEST}" +printf '(3) %s\n' "List ${LIST} is now ready" exit 0 |