From b503900b1456e8bd4b60d1deb0ef7cc3665676f1 Mon Sep 17 00:00:00 2001 From: Toni Uhlig Date: Sun, 4 Apr 2021 21:59:14 +0200 Subject: First step of cleaning up the Makefile.in hell. The goal is to get rid of the Makefile.in's (replace it with Automake Makefile.am's) as they duplicate lot's of text. That decreases readability and is in general a bad design pattern. It seems appropriate to use Automake for an Autoconf based project. Currently achieved: * using libtool to build the core library (+libtool's semantic versioning) * out-of-source builds should work right now * introducing Automake based Makefile in src/ * removed some (maybe) unused GIT ignored files * provide some small python fixes Signed-off-by: Toni Uhlig --- tests/do-dga.sh.in | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 tests/do-dga.sh.in (limited to 'tests/do-dga.sh.in') diff --git a/tests/do-dga.sh.in b/tests/do-dga.sh.in new file mode 100755 index 000000000..4c5bd8766 --- /dev/null +++ b/tests/do-dga.sh.in @@ -0,0 +1,65 @@ +#!/bin/sh + +cd "$(dirname "${0}")" + +# Baseline performances ------------------------------------------------------------------------------------------------ +# Important notes: BASE values must be integers examples and represents percentage (e.g. 79%, 98%). +BASE_ACCURACY=69 +BASE_PRECISION=89 +BASE_RECALL=41 +# ---------------------------------------------------------------------------------------------------------------------- + +DGA_EVALUATE="dga/dga_evaluate" +DGA_DATA="@srcdir@/dga/test_dga.csv" +NON_DGA_DATA="@srcdir@/dga/test_non_dga.csv" +DGA_DATA_SIZE=0 +NON_DGA_DATA_SIZE=0 +DATA_SIZE=0 +RC=0 + +get_evaluation_data_size() { + DGA_DATA_SIZE=`wc -l ${DGA_DATA} | awk '{split($0,a," "); print a[1]}'` + NON_DGA_DATA_SIZE=`wc -l ${NON_DGA_DATA} | awk '{split($0,a," "); print a[1]}'` + DATA_SIZE=$(( $NON_DGA_DATA_SIZE + $DGA_DATA_SIZE )) +} + +evaluate_ndpi_dga_detection() { + # DGA detection is a binary classification problem, We evaluate the following metrics: + # Accuracy: (TP + TN) / (TP + TN + FN + FP) + # Precision: TP / (TP + FP) + # Recall: TP / (TP + FN) + + TP=`$DGA_EVALUATE ${DGA_DATA}` + FN=$(( $DGA_DATA_SIZE - $TP )) + FP=`$DGA_EVALUATE ${NON_DGA_DATA}` + TN=$(( $NON_DGA_DATA_SIZE - $FP )) + + ACCURACY=`echo "print(int(((${TP} + ${TN})/(${TP} + ${TN} + ${FP} + ${FN}))*100))" | python3` + PRECISION=`echo "print(int(((${TP})/(${TP} + ${FP}))*100))" | python3` + RECALL=`echo "print(int(((${TP})/(${TP} + ${FN}))*100))" | python3` + + # In case modified version of classification algorithm decreases performances, test do not pass. + if [ $ACCURACY -lt $BASE_ACCURACY ]; then + printf "ERROR: Your modifications decreased DGA classifier accuracy: 0.${BASE_ACCURACY} decreased to 0.${ACCURACY}!\n" + RC=1 + fi + if [ $PRECISION -lt $BASE_PRECISION ]; then + printf "ERROR: Your modifications decreased DGA classifier precision: 0.${BASE_PRECISION} decreased to 0.${PRECISION}!\n" + RC=1 + fi + if [ $RECALL -lt $BASE_RECALL ]; then + printf "ERROR: Your modifications decreased DGA classifier recall: 0.${BASE_RECALL} decreased to 0.${RECALL}!\n" + RC=1 + fi + + # Finally we print the current performances, upgrade BASE_ metrics in case you improved it. + echo "DGA detection performances report:" + echo "Accuracy=0.$ACCURACY" + echo "Precision=0.$PRECISION" + echo "Recall=0.$RECALL" +} + +get_evaluation_data_size +evaluate_ndpi_dga_detection + +exit $RC -- cgit v1.2.3