diff options
Diffstat (limited to 'utils/crawlers_ip_addresses_download.sh')
-rwxr-xr-x | utils/crawlers_ip_addresses_download.sh | 30 |
1 files changed, 13 insertions, 17 deletions
diff --git a/utils/crawlers_ip_addresses_download.sh b/utils/crawlers_ip_addresses_download.sh index 77e70c61b..50b8934a3 100755 --- a/utils/crawlers_ip_addresses_download.sh +++ b/utils/crawlers_ip_addresses_download.sh @@ -1,8 +1,9 @@ -#!/bin/sh +#!/usr/bin/env bash set -e cd "$(dirname "${0}")" || exit 1 +. ./common.sh || exit 1 DEST=../src/lib/inc_generated/ndpi_crawlers_match.c.inc TMP1=/tmp/bot_google_c1.json @@ -23,34 +24,27 @@ ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json" echo "(1) Downloading file... ${ORIGIN1}" http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1}) -if [ "$http_response" != "200" ]; then - echo "Error $http_response: you probably need to update the list url!" - exit 1 -fi +check_http_response "${http_response}" +is_file_empty "${TMP1}" echo "(1) Downloading file... ${ORIGIN2}" http_response=$(curl -s -o $TMP2 -w "%{http_code}" ${ORIGIN2}) -if [ "$http_response" != "200" ]; then - echo "Error $http_response: you probably need to update the list url!" - exit 1 -fi +check_http_response "${http_response}" +is_file_empty "${TMP2}" echo "(1) Downloading file... ${ORIGIN3}" http_response=$(curl -s -o $TMP3 -w "%{http_code}" ${ORIGIN3}) -if [ "$http_response" != "200" ]; then - echo "Error $http_response: you probably need to update the list url!" - exit 1 -fi +check_http_response "${http_response}" +is_file_empty "${TMP3}" echo "(1) Downloading file... ${ORIGIN_BING}" http_response=$(curl -s -o $TMP_BING -w "%{http_code}" ${ORIGIN_BING}) -if [ "$http_response" != "200" ]; then - echo "Error $http_response: you probably need to update the list url!" - exit 1 -fi +check_http_response "${http_response}" +is_file_empty "${TMP_BING}" echo "(1) Downloading FB crawlers routes... " whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB +is_file_empty "${TMP_FB}" echo "(2) Processing IP addresses..." { @@ -60,7 +54,9 @@ echo "(2) Processing IP addresses..." jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING # TODO: ipv6 grep -v route6 $TMP_FB | tr -d 'route:^ ' # TODO: ipv6 } > $LIST +is_file_empty "${LIST}" ./ipaddr2list.py $LIST NDPI_HTTP_CRAWLER_BOT > $DEST +is_file_empty "${DEST}" rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST echo "(3) Crawlers IPs are available in $DEST" |