First step of cleaning up the Makefile.in hell.first-step-to-automake-integration

The goal is to get rid of the Makefile.in's (replace it with Automake Makefile.am's) as they duplicate lot's of text. That decreases readability and is in general a bad design pattern. It seems appropriate to use Automake for an Autoconf based project. Currently achieved: * using libtool to build the core library (+libtool's semantic versioning) * out-of-source builds should work right now * introducing Automake based Makefile in src/ * removed some (maybe) unused GIT ignored files * provide some small python fixes Signed-off-by: Toni Uhlig <matzeton@googlemail.com>
author: Toni Uhlig <matzeton@googlemail.com> 2021-04-04 21:59:14 +0200
committer: Toni Uhlig <matzeton@googlemail.com> 2021-07-30 22:09:21 +0200
commit: b503900b1456e8bd4b60d1deb0ef7cc3665676f1 (patch)
tree: 92e7a88c190caf23f810e0a828b6da926f11879a /tests/do-dga.sh.in
parent: 13c5d6801e3b9fdc71bec88c300243a939346a8d (diff)
1 files changed, 65 insertions, 0 deletions
diff --git a/tests/do-dga.sh.in b/tests/do-dga.sh.in
new file mode 100755
index 000000000..4c5bd8766
--- /dev/null
+++ b/tests/do-dga.sh.in
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+cd "$(dirname "${0}")"
+
+# Baseline performances ------------------------------------------------------------------------------------------------
+# Important notes: BASE values must be integers examples and represents percentage (e.g. 79%, 98%).
+BASE_ACCURACY=69
+BASE_PRECISION=89
+BASE_RECALL=41
+# ----------------------------------------------------------------------------------------------------------------------
+
+DGA_EVALUATE="dga/dga_evaluate"
+DGA_DATA="@srcdir@/dga/test_dga.csv"
+NON_DGA_DATA="@srcdir@/dga/test_non_dga.csv"
+DGA_DATA_SIZE=0
+NON_DGA_DATA_SIZE=0
+DATA_SIZE=0
+RC=0
+
+get_evaluation_data_size() {
+  DGA_DATA_SIZE=`wc -l ${DGA_DATA} | awk '{split($0,a," "); print a[1]}'`
+  NON_DGA_DATA_SIZE=`wc -l ${NON_DGA_DATA} | awk '{split($0,a," "); print a[1]}'`
+  DATA_SIZE=$(( $NON_DGA_DATA_SIZE + $DGA_DATA_SIZE ))
+}
+
+evaluate_ndpi_dga_detection() {
+  # DGA detection is a binary classification problem, We evaluate the following metrics:
+  # Accuracy: (TP + TN) / (TP + TN + FN + FP)
+  # Precision: TP / (TP + FP)
+  # Recall: TP / (TP + FN)
+
+  TP=`$DGA_EVALUATE ${DGA_DATA}`
+  FN=$(( $DGA_DATA_SIZE - $TP ))
+  FP=`$DGA_EVALUATE ${NON_DGA_DATA}`
+  TN=$(( $NON_DGA_DATA_SIZE - $FP ))
+
+  ACCURACY=`echo "print(int(((${TP} + ${TN})/(${TP} + ${TN} + ${FP} + ${FN}))*100))" | python3`
+  PRECISION=`echo "print(int(((${TP})/(${TP} + ${FP}))*100))" | python3`
+  RECALL=`echo "print(int(((${TP})/(${TP} + ${FN}))*100))" | python3`
+
+  # In case modified version of classification algorithm decreases performances, test do not pass.
+  if [ $ACCURACY -lt $BASE_ACCURACY ]; then
+		 printf "ERROR: Your modifications decreased DGA classifier accuracy: 0.${BASE_ACCURACY} decreased to 0.${ACCURACY}!\n"
+		 RC=1
+  fi
+  if [ $PRECISION -lt $BASE_PRECISION ]; then
+		 printf "ERROR: Your modifications decreased DGA classifier precision: 0.${BASE_PRECISION} decreased to 0.${PRECISION}!\n"
+		 RC=1
+  fi
+  if [ $RECALL -lt $BASE_RECALL ]; then
+		 printf "ERROR: Your modifications decreased DGA classifier recall: 0.${BASE_RECALL} decreased to 0.${RECALL}!\n"
+		 RC=1
+  fi
+
+  # Finally we print the current performances, upgrade BASE_ metrics in case you improved it.
+  echo "DGA detection performances report:"
+  echo "Accuracy=0.$ACCURACY"
+  echo "Precision=0.$PRECISION"
+  echo "Recall=0.$RECALL"
+}
+
+get_evaluation_data_size
+evaluate_ndpi_dga_detection
+
+exit $RC
author	Toni Uhlig <matzeton@googlemail.com>	2021-04-04 21:59:14 +0200
committer	Toni Uhlig <matzeton@googlemail.com>	2021-07-30 22:09:21 +0200
commit	b503900b1456e8bd4b60d1deb0ef7cc3665676f1 (patch)
tree	92e7a88c190caf23f810e0a828b6da926f11879a /tests/do-dga.sh.in
parent	13c5d6801e3b9fdc71bec88c300243a939346a8d (diff)