#!/usr/bin/env bash cd "$(dirname "${0}")" || exit 1 # Baseline performances ------------------------------------------------------------------------------------------------ # Important notes: BASE values must be integers examples and represents percentage (e.g. 79%, 98%). BASE_ACCURACY=69 BASE_PRECISION=89 BASE_RECALL=40 # ---------------------------------------------------------------------------------------------------------------------- DGA_EVALUATE="./dga/dga_evaluate" DGA_DATA="dga/test_dga.csv" NON_DGA_DATA="dga/test_non_dga.csv" DGA_DATA_SIZE=0 NON_DGA_DATA_SIZE=0 DATA_SIZE=0 RC=0 get_evaluation_data_size() { DGA_DATA_SIZE=$(wc -l dga/test_dga.csv | awk '{split($0,a," "); print a[1]}') NON_DGA_DATA_SIZE=$(wc -l dga/test_non_dga.csv | awk '{split($0,a," "); print a[1]}') DATA_SIZE=$(( NON_DGA_DATA_SIZE + DGA_DATA_SIZE )) } evaluate_ndpi_dga_detection() { # DGA detection is a binary classification problem, We evaluate the following metrics: # Accuracy: (TP + TN) / (TP + TN + FN + FP) # Precision: TP / (TP + FP) # Recall: TP / (TP + FN) TP=$($DGA_EVALUATE dga/test_dga.csv) FN=$(( DGA_DATA_SIZE - TP )) FP=$($DGA_EVALUATE dga/test_non_dga.csv) TN=$(( NON_DGA_DATA_SIZE - FP )) ACCURACY=$(echo "print(int(((${TP} + ${TN})/(${TP} + ${TN} + ${FP} + ${FN}))*100))" | python3) PRECISION=$(echo "print(int(((${TP})/(${TP} + ${FP}))*100))" | python3) RECALL=$(echo "print(int(((${TP})/(${TP} + ${FN}))*100))" | python3) # In case modified version of classification algorithm decreases performances, test do not pass. if [ "$ACCURACY" -lt "$BASE_ACCURACY" ]; then printf "ERROR: Your modifications decreased DGA classifier accuracy: 0.${BASE_ACCURACY} decreased to 0.${ACCURACY}!\n" RC=1 fi if [ "$PRECISION" -lt "$BASE_PRECISION" ]; then printf "ERROR: Your modifications decreased DGA classifier precision: 0.${BASE_PRECISION} decreased to 0.${PRECISION}!\n" RC=1 fi if [ "$RECALL" -lt "$BASE_RECALL" ]; then printf "ERROR: Your modifications decreased DGA classifier recall: 0.${BASE_RECALL} decreased to 0.${RECALL}!\n" RC=1 fi # Finally we print the current performances, upgrade BASE_ metrics in case you improved it. echo "DGA detection performances report:" echo "Accuracy=0.$ACCURACY" echo "Precision=0.$PRECISION" echo "Recall=0.$RECALL" } get_evaluation_data_size evaluate_ndpi_dga_detection exit $RC