aboutsummaryrefslogtreecommitdiff
path: root/utils/microsoft_domains_download.sh
blob: 259ef3d0de64398bc0be4f7e3c4e9c694a3c102f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/sh

set -e

cd "$(dirname "${0}")" || exit 1

DEST_OUTLOOK=../src/lib/inc_generated/ndpi_domains_ms_outlook_match.c.inc
DEST_MSTEAMS=../src/lib/inc_generated/ndpi_domains_ms_teams_match.c.inc
DEST_ONEDRIVE=../src/lib/inc_generated/ndpi_domains_ms_onedrive_match.c.inc
DEST_OFFICE365=../src/lib/inc_generated/ndpi_domains_ms_office365_match.c.inc
DEST_AZURE=../src/lib/inc_generated/ndpi_domains_ms_azure_match.c.inc
TMP=/tmp/ms.json
TMP_AZURE=/tmp/azure.html
LIST=/tmp/ms.list
# https://docs.microsoft.com/en-us/microsoft-365/enterprise/urls-and-ip-address-ranges?view=o365-worldwide
ORIGIN="https://endpoints.office.com/endpoints/worldwide?clientrequestid=b10c5ed1-bad1-445f-b386-b919946339a7"
ORIGIN_AZURE="https://learn.microsoft.com/en-us/azure/security/fundamentals/azure-domains"

echo "(1) Downloading file... ${ORIGIN}"
http_response=$(curl -s -o $TMP -w "%{http_code}" ${ORIGIN})
if [ $http_response != "200" ]; then
    echo "Error $http_response: you probably need to update the list url!"
    exit 1
fi

echo "(1) Downloading file... ${ORIGIN_AZURE}"
http_response=$(curl -s -o $TMP_AZURE -w "%{http_code}" ${ORIGIN_AZURE})
if [ $http_response != "200" ]; then
    echo "Error $http_response: you probably need to update the list url!"
    exit 1
fi

echo "(2) Processing domains..."

#OUTLOOK
jq -r '.[] | select(.serviceArea=="Exchange") | .urls[]?' < $TMP | sed 's/^\*\.//' | sort -u | uniq > $LIST
./domains2list.py $LIST "Outlook" NDPI_PROTOCOL_MS_OUTLOOK NDPI_PROTOCOL_CATEGORY_MAIL NDPI_PROTOCOL_ACCEPTABLE > $DEST_OUTLOOK

#SKYPE/TEAMS
jq -r '.[] | select(.serviceArea=="Skype") | .urls[]?' < $TMP | sed 's/^\*\.//' | sort -u | uniq > $LIST
./domains2list.py $LIST "Teams" NDPI_PROTOCOL_MSTEAMS NDPI_PROTOCOL_CATEGORY_COLLABORATIVE NDPI_PROTOCOL_ACCEPTABLE > $DEST_MSTEAMS

#ONEDRIVE
jq -r '.[] | select(.serviceArea=="SharePoint") | .urls[]?' < $TMP | sed 's/^\*\.//' | sort -u | uniq > $LIST
./domains2list.py $LIST "MS_OneDrive" NDPI_PROTOCOL_MS_ONE_DRIVE NDPI_PROTOCOL_CATEGORY_CLOUD NDPI_PROTOCOL_ACCEPTABLE > $DEST_ONEDRIVE

#OFFICE
# On .id 125 there are mainly some generic domains for certificate management like digicert, verisign. Specific
# microsoft domains are classified via the generic "catch-all" rules in ndpi_content_match.c.in
#
# Skip also the three generic domains (on .id 50 and 78): *.microsoft.com, *.msocdn.com, *.onmicrosoft.com
# As reported in the web page:
#  "Some Office 365 features require endpoints within these domains (including CDNs). Many specific
#   FQDNs within these wildcards have been published recently as we work to either remove or better
#   explain our guidance relating to these wildcards"
#
# We will handle them via the generic "catch-all" rules as well
#
jq -r '.[] | select(.serviceArea=="Common" and .id != 125 and .id != 50 and .id != 78) | .urls[]?' < $TMP | sed -n '/\*\.\.com/!p' | sed 's/^\*\.//' | sort -u | uniq > $LIST
./domains2list.py $LIST "Microsoft365" NDPI_PROTOCOL_MICROSOFT_365 NDPI_PROTOCOL_CATEGORY_COLLABORATIVE NDPI_PROTOCOL_ACCEPTABLE > $DEST_OFFICE365

#AZURE
#Skip *.onmicrosoft.com; see above
cat $TMP_AZURE | sed -ne 's/<td>\*.\(.*\)<\/td>/\1/gp' | sed 's/\/ /\n/g' | sed -e '/\*\.onmicrosoft\.com/d' | sed 's/^\*\.//' | sort -u | uniq > $LIST
./domains2list.py $LIST "Azure" NDPI_PROTOCOL_MICROSOFT_AZURE NDPI_PROTOCOL_CATEGORY_CLOUD NDPI_PROTOCOL_ACCEPTABLE > $DEST_AZURE

rm -f $TMP $TMP_AZURE $LIST

echo "(3) Microsoft domains are available in $DEST_OUTLOOK, $DEST_MSTEAMS, $DEST_ONEDRIVE, $DEST_OFFICE365, $DEST_AZURE"
exit 0