aboutsummaryrefslogtreecommitdiff
path: root/utils/crawlers_ip_addresses_download.sh
diff options
context:
space:
mode:
Diffstat (limited to 'utils/crawlers_ip_addresses_download.sh')
-rwxr-xr-xutils/crawlers_ip_addresses_download.sh30
1 files changed, 13 insertions, 17 deletions
diff --git a/utils/crawlers_ip_addresses_download.sh b/utils/crawlers_ip_addresses_download.sh
index 77e70c61b..50b8934a3 100755
--- a/utils/crawlers_ip_addresses_download.sh
+++ b/utils/crawlers_ip_addresses_download.sh
@@ -1,8 +1,9 @@
-#!/bin/sh
+#!/usr/bin/env bash
set -e
cd "$(dirname "${0}")" || exit 1
+. ./common.sh || exit 1
DEST=../src/lib/inc_generated/ndpi_crawlers_match.c.inc
TMP1=/tmp/bot_google_c1.json
@@ -23,34 +24,27 @@ ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json"
echo "(1) Downloading file... ${ORIGIN1}"
http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1})
-if [ "$http_response" != "200" ]; then
- echo "Error $http_response: you probably need to update the list url!"
- exit 1
-fi
+check_http_response "${http_response}"
+is_file_empty "${TMP1}"
echo "(1) Downloading file... ${ORIGIN2}"
http_response=$(curl -s -o $TMP2 -w "%{http_code}" ${ORIGIN2})
-if [ "$http_response" != "200" ]; then
- echo "Error $http_response: you probably need to update the list url!"
- exit 1
-fi
+check_http_response "${http_response}"
+is_file_empty "${TMP2}"
echo "(1) Downloading file... ${ORIGIN3}"
http_response=$(curl -s -o $TMP3 -w "%{http_code}" ${ORIGIN3})
-if [ "$http_response" != "200" ]; then
- echo "Error $http_response: you probably need to update the list url!"
- exit 1
-fi
+check_http_response "${http_response}"
+is_file_empty "${TMP3}"
echo "(1) Downloading file... ${ORIGIN_BING}"
http_response=$(curl -s -o $TMP_BING -w "%{http_code}" ${ORIGIN_BING})
-if [ "$http_response" != "200" ]; then
- echo "Error $http_response: you probably need to update the list url!"
- exit 1
-fi
+check_http_response "${http_response}"
+is_file_empty "${TMP_BING}"
echo "(1) Downloading FB crawlers routes... "
whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB
+is_file_empty "${TMP_FB}"
echo "(2) Processing IP addresses..."
{
@@ -60,7 +54,9 @@ echo "(2) Processing IP addresses..."
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING # TODO: ipv6
grep -v route6 $TMP_FB | tr -d 'route:^ ' # TODO: ipv6
} > $LIST
+is_file_empty "${LIST}"
./ipaddr2list.py $LIST NDPI_HTTP_CRAWLER_BOT > $DEST
+is_file_empty "${DEST}"
rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST
echo "(3) Crawlers IPs are available in $DEST"