forked from Unstructured-IO/unstructured
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck-diff-evaluation-metrics.sh
executable file
·74 lines (66 loc) · 2.65 KB
/
check-diff-evaluation-metrics.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env bash
# Description: Compare the current evaluation metrics to the previoud evaluation metrics and exit
# with an error if they are different. If the environment variable OVERWRITE_FIXTURES
# is not "false", then this script will instead copy the output files to the expected
# output directory.
#
# Environment Variables:
# - OVERWRITE_FIXTURES: Controls whether to overwrite fixtures or not. default: "false"
set +e
SCRIPT_DIR=$(dirname "$(realpath "$0")")
OVERWRITE_FIXTURES=${OVERWRITE_FIXTURES:-false}
TMP_DIRECTORY_CLEANUP=${TMP_DIRECTORY_CLEANUP:-true}
EVAL_NAME=$1
OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR}
# TMP_METRICS_LATEST_RUN_DIR could be test_unstructured_ingest/metrics-tmp/text-extraction
# or test_unstructured_ingest/metrics-tmp/element-type
TMP_METRICS_LATEST_RUN_DIR=$OUTPUT_ROOT/metrics-tmp/$EVAL_NAME
# METRICS_DIR could be test_unstructured_ingest/metrics/text-extraction
# or test_unstructured_ingest/metrics/element-type
METRICS_DIR=$OUTPUT_ROOT/metrics/$EVAL_NAME
# shellcheck disable=SC1091
source "$SCRIPT_DIR"/cleanup.sh
function cleanup() {
if [ "$TMP_DIRECTORY_CLEANUP" == "true" ]; then
cleanup_dir "$TMP_METRICS_LATEST_RUN_DIR"
else
echo "skipping tmp directory cleanup"
fi
}
trap cleanup EXIT
function check_output_folder() {
if [ ! -d "$TMP_METRICS_LATEST_RUN_DIR" ]; then
# there is no evaluation output to perform action
exit 0
fi
}
# to update ingest test fixtures, run scripts/ingest-test-fixtures-update.sh on x86_64
if [ "$OVERWRITE_FIXTURES" != "false" ]; then
# remove folder if it exists
if [ -d "$METRICS_DIR" ]; then
rm -rf "$METRICS_DIR"
# find "$METRICS_DIR" -maxdepth 1 -type f ! -name "metrics-json-manifest.txt" -exec rm -rf {} +
fi
# force copy (overwrite) files from metrics-tmp (new eval metrics) to metrics (old eval metrics)
mkdir -p "$METRICS_DIR"
check_output_folder
cp -rf "$TMP_METRICS_LATEST_RUN_DIR" "$OUTPUT_ROOT/metrics"
elif ! diff -ru "$METRICS_DIR" "$TMP_METRICS_LATEST_RUN_DIR"; then
check_output_folder
"$SCRIPT_DIR"/clean-permissions-files.sh "$TMP_METRICS_LATEST_RUN_DIR"
diff -ru "$METRICS_DIR" "$TMP_METRICS_LATEST_RUN_DIR" >metricsdiff.txt
diffstat -c metricsdiff.txt
echo
echo "There are differences from the previously checked-in structured outputs."
echo
echo "If these differences are acceptable, overwrite by the fixtures by setting the env var:"
echo
echo " export OVERWRITE_FIXTURES=true"
echo
echo "and then rerun this script."
echo
echo "NOTE: You'll likely just want to run scripts/ingest-test-fixtures-update.sh on x86_64 hardware"
echo "to update fixtures for CI."
echo
exit 1
fi