aboutsummaryrefslogtreecommitdiff
path: root/tests/do-dga.sh.in
diff options
context:
space:
mode:
authorToni Uhlig <matzeton@googlemail.com>2021-04-04 21:59:14 +0200
committerToni Uhlig <matzeton@googlemail.com>2021-07-30 22:09:21 +0200
commitb503900b1456e8bd4b60d1deb0ef7cc3665676f1 (patch)
tree92e7a88c190caf23f810e0a828b6da926f11879a /tests/do-dga.sh.in
parent13c5d6801e3b9fdc71bec88c300243a939346a8d (diff)
First step of cleaning up the Makefile.in hell.first-step-to-automake-integration
The goal is to get rid of the Makefile.in's (replace it with Automake Makefile.am's) as they duplicate lot's of text. That decreases readability and is in general a bad design pattern. It seems appropriate to use Automake for an Autoconf based project. Currently achieved: * using libtool to build the core library (+libtool's semantic versioning) * out-of-source builds should work right now * introducing Automake based Makefile in src/ * removed some (maybe) unused GIT ignored files * provide some small python fixes Signed-off-by: Toni Uhlig <matzeton@googlemail.com>
Diffstat (limited to 'tests/do-dga.sh.in')
-rwxr-xr-xtests/do-dga.sh.in65
1 files changed, 65 insertions, 0 deletions
diff --git a/tests/do-dga.sh.in b/tests/do-dga.sh.in
new file mode 100755
index 000000000..4c5bd8766
--- /dev/null
+++ b/tests/do-dga.sh.in
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+cd "$(dirname "${0}")"
+
+# Baseline performances ------------------------------------------------------------------------------------------------
+# Important notes: BASE values must be integers examples and represents percentage (e.g. 79%, 98%).
+BASE_ACCURACY=69
+BASE_PRECISION=89
+BASE_RECALL=41
+# ----------------------------------------------------------------------------------------------------------------------
+
+DGA_EVALUATE="dga/dga_evaluate"
+DGA_DATA="@srcdir@/dga/test_dga.csv"
+NON_DGA_DATA="@srcdir@/dga/test_non_dga.csv"
+DGA_DATA_SIZE=0
+NON_DGA_DATA_SIZE=0
+DATA_SIZE=0
+RC=0
+
+get_evaluation_data_size() {
+ DGA_DATA_SIZE=`wc -l ${DGA_DATA} | awk '{split($0,a," "); print a[1]}'`
+ NON_DGA_DATA_SIZE=`wc -l ${NON_DGA_DATA} | awk '{split($0,a," "); print a[1]}'`
+ DATA_SIZE=$(( $NON_DGA_DATA_SIZE + $DGA_DATA_SIZE ))
+}
+
+evaluate_ndpi_dga_detection() {
+ # DGA detection is a binary classification problem, We evaluate the following metrics:
+ # Accuracy: (TP + TN) / (TP + TN + FN + FP)
+ # Precision: TP / (TP + FP)
+ # Recall: TP / (TP + FN)
+
+ TP=`$DGA_EVALUATE ${DGA_DATA}`
+ FN=$(( $DGA_DATA_SIZE - $TP ))
+ FP=`$DGA_EVALUATE ${NON_DGA_DATA}`
+ TN=$(( $NON_DGA_DATA_SIZE - $FP ))
+
+ ACCURACY=`echo "print(int(((${TP} + ${TN})/(${TP} + ${TN} + ${FP} + ${FN}))*100))" | python3`
+ PRECISION=`echo "print(int(((${TP})/(${TP} + ${FP}))*100))" | python3`
+ RECALL=`echo "print(int(((${TP})/(${TP} + ${FN}))*100))" | python3`
+
+ # In case modified version of classification algorithm decreases performances, test do not pass.
+ if [ $ACCURACY -lt $BASE_ACCURACY ]; then
+ printf "ERROR: Your modifications decreased DGA classifier accuracy: 0.${BASE_ACCURACY} decreased to 0.${ACCURACY}!\n"
+ RC=1
+ fi
+ if [ $PRECISION -lt $BASE_PRECISION ]; then
+ printf "ERROR: Your modifications decreased DGA classifier precision: 0.${BASE_PRECISION} decreased to 0.${PRECISION}!\n"
+ RC=1
+ fi
+ if [ $RECALL -lt $BASE_RECALL ]; then
+ printf "ERROR: Your modifications decreased DGA classifier recall: 0.${BASE_RECALL} decreased to 0.${RECALL}!\n"
+ RC=1
+ fi
+
+ # Finally we print the current performances, upgrade BASE_ metrics in case you improved it.
+ echo "DGA detection performances report:"
+ echo "Accuracy=0.$ACCURACY"
+ echo "Precision=0.$PRECISION"
+ echo "Recall=0.$RECALL"
+}
+
+get_evaluation_data_size
+evaluate_ndpi_dga_detection
+
+exit $RC