aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rwxr-xr-xutils/crawlers_ip_addresses_download.sh9
1 files changed, 7 insertions, 2 deletions
diff --git a/utils/crawlers_ip_addresses_download.sh b/utils/crawlers_ip_addresses_download.sh
index c4f4daa0b..77e70c61b 100755
--- a/utils/crawlers_ip_addresses_download.sh
+++ b/utils/crawlers_ip_addresses_download.sh
@@ -9,6 +9,7 @@ TMP1=/tmp/bot_google_c1.json
TMP2=/tmp/bot_google_c2.json
TMP3=/tmp/bot_google_c3.json
TMP_BING=/tmp/bot_bing.json
+TMP_FB=/tmp/bot_fb.list
LIST=/tmp/bot.list
#Google Common crawlers
ORIGIN1="https://developers.google.com/static/search/apis/ipranges/googlebot.json"
@@ -18,7 +19,7 @@ ORIGIN2="https://developers.google.com/static/search/apis/ipranges/special-crawl
ORIGIN3="https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers.json"
#Bing Bot
ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json"
-
+#Facebook Bot: https://developers.facebook.com/docs/sharing/webmasters/crawler/
echo "(1) Downloading file... ${ORIGIN1}"
http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1})
@@ -48,15 +49,19 @@ if [ "$http_response" != "200" ]; then
exit 1
fi
+echo "(1) Downloading FB crawlers routes... "
+whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB
+
echo "(2) Processing IP addresses..."
{
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP1 # TODO: ipv6
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP2 # TODO: ipv6
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP3 # TODO: ipv6
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING # TODO: ipv6
+ grep -v route6 $TMP_FB | tr -d 'route:^ ' # TODO: ipv6
} > $LIST
./ipaddr2list.py $LIST NDPI_HTTP_CRAWLER_BOT > $DEST
-rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $LIST
+rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST
echo "(3) Crawlers IPs are available in $DEST"
exit 0