Skip to content

Commit

Permalink
Add checks for *.pb datasets (open-reaction-database#27)
Browse files Browse the repository at this point in the history
* Add tests for *.pb datasets

* Move event trigger

* Remove quotes?

* Migrate to environment files
  • Loading branch information
skearnes authored Oct 22, 2020
1 parent 92c9b53 commit 12326bd
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 6 deletions.
15 changes: 12 additions & 3 deletions .github/workflows/submission.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
run: |
cd "${GITHUB_WORKSPACE}"
git diff --name-only upstream/main > changed_files.txt
grep -vE '\.pbtxt|\.pb$|\.svg$' changed_files.txt && \
grep -vE '\.pbtxt$|\.pb$|\.svg$' changed_files.txt && \
echo "Error: submission contains unusual file types" && \
exit 1 || true
if: >-
Expand Down Expand Up @@ -124,10 +124,11 @@ jobs:
echo "Found $(wc -l < changed_files.txt | tr -d ' ') changed files"
cat changed_files.txt
# Use `|| (( $? == 1 ))` in case no lines match (exit code is nonzero).
grep -e "\.pbtxt$" changed_files.txt > changed_pbtxt_files.txt || (( $? == 1 ))
grep -E "\.pbtxt$" changed_files.txt > changed_pbtxt_files.txt || (( $? == 1 ))
grep -E "\.pb$" changed_files.txt > changed_pb_files.txt || (( $? == 1 ))
# Use LOCAL_NUM_CHANGED since ::set-env values are not available immediately.
LOCAL_NUM_CHANGED="$(wc -l < changed_pbtxt_files.txt | tr -d ' ')"
echo "::set-env name=NUM_CHANGED_FILES::${LOCAL_NUM_CHANGED}"
echo "NUM_CHANGED_FILES=${LOCAL_NUM_CHANGED}" >> $GITHUB_ENV
echo "Found ${LOCAL_NUM_CHANGED} changed pbtxt files"
cat changed_pbtxt_files.txt
- name: Install miniconda
Expand All @@ -148,6 +149,14 @@ jobs:
python ./ord-schema/ord_schema/process_dataset.py \
--input_file=changed_pbtxt_files.txt \
--base=upstream/main
python ./ord-schema/ord_schema/process_dataset.py \
--input_file=changed_pb_files.txt
for filename in "$(cat changed_pb_files.txt)"; do
echo "${filename}"
python ./ord-schema/ord_schema/scripts/check_pb.py \
--pb="${filename}" \
--pbtxt="${filename}txt"
done
if: env.NUM_CHANGED_FILES != '0'
- name: Update submission
run: |
Expand Down
24 changes: 21 additions & 3 deletions .github/workflows/validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ on:
branches:
- main
schedule:
# Runs every eight hours.
- cron: '0 */8 * * *'
# Runs every night at midnight.
- cron: '0 0 * * *'
pull_request:
paths:
# Runs when this file is modified in a PR.
- '.github/workflows/validation.yml'

jobs:
validate_database:
Expand All @@ -31,8 +35,22 @@ jobs:
pip install -r requirements.txt
conda install -c rdkit rdkit
python setup.py install
- name: Validate
- name: Validate *.pbtxt datasets
run: |
cd "${GITHUB_WORKSPACE}"
python ./ord-schema/ord_schema/process_dataset.py \
--input_pattern="data/*/*.pbtxt"
- name: Validate *.pb datasets
run: |
cd "${GITHUB_WORKSPACE}"
python ./ord-schema/ord_schema/process_dataset.py \
--input_pattern="data/*/*.pb"
- name: Check pbtxt <-> pb encoding
run: |
cd "${GITHUB_WORKSPACE}"
for filename in data/*/*.pb; do
echo "${filename}"
python ./ord-schema/ord_schema/scripts/check_pb.py \
--pb="${filename}" \
--pbtxt="${filename}txt"
done

0 comments on commit 12326bd

Please sign in to comment.