Skip to content

Commit

Permalink
Store perf numbers in S3 (pytorch#5951)
Browse files Browse the repository at this point in the history
* Store perf numbers in S3

Previously the perf numbers are stored in https://github.com/yf225/perf-tests/tree/cpu, but we couldn't figure out a way to push the perf numbers only from master builds. This PR moves the perf number storage to S3, which allows us to have finer control over when to push the new numbers.

This is in replacement of pytorch#5844 - storing numbers in RDS has its own problems with schema migration and backward compatibility, and using a NoSQL database might be an overkill at this point.

* Fixed issues
  • Loading branch information
yf225 authored and ezyang committed Mar 24, 2018
1 parent 332d5ff commit 2f8d658
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 42 deletions.
14 changes: 10 additions & 4 deletions .jenkins/perf_test/compare_with_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@
elif 'gpu' in test_name:
backend = 'gpu'

data_file_path = '../perf_test_numbers_{}.json'.format(backend)
data_file_path = '../{}_runtime.json'.format(backend)

with open(data_file_path) as data_file:
data = json.load(data_file)

mean = float(data[test_name]['mean'])
sigma = float(data[test_name]['sigma'])
if test_name in data:
mean = float(data[test_name]['mean'])
sigma = float(data[test_name]['sigma'])
else:
# Let the test pass if baseline number doesn't exist
mean = sys.maxsize
sigma = 0.001

print("population mean: ", mean)
print("population sigma: ", sigma)
Expand All @@ -51,9 +56,10 @@
print("z-value < 2, no perf regression detected.")
if args.update:
print("We will use these numbers as new baseline.")
new_data_file_path = '../new_perf_test_numbers_{}.json'.format(backend)
new_data_file_path = '../new_{}_runtime.json'.format(backend)
with open(new_data_file_path) as new_data_file:
new_data = json.load(new_data_file)
new_data[test_name] = {}
new_data[test_name]['mean'] = sample_mean
new_data[test_name]['sigma'] = max(sample_sigma, sample_mean * 0.01)
with open(new_data_file_path, 'w') as new_data_file:
Expand Down
46 changes: 27 additions & 19 deletions .jenkins/short-perf-test-cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,49 @@ cd .jenkins/perf_test

echo "Running CPU perf test for PyTorch..."

# Get last master commit hash
export PYTORCH_COMMIT_ID=$(git log --format="%H" -n 1)
pip install awscli

# Get baseline file from https://github.com/yf225/perf-tests
if [ -f /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json ]; then
cp /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json perf_test_numbers_cpu.json
else
curl https://raw.githubusercontent.com/yf225/perf-tests/master/perf_test_numbers_cpu.json -O
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
# More info at https://github.com/aws/aws-cli/issues/2321
aws configure set default.s3.multipart_threshold 5GB

if [[ "$COMMIT_SOURCE" == master ]]; then
# Get current master commit hash
export MASTER_COMMIT_ID=$(git log --format="%H" -n 1)
fi

if [[ "$GIT_COMMIT" == *origin/master* ]]; then
# Find the master commit to test against
IFS=$'\n'
master_commit_ids=($(git rev-list HEAD))
for commit_id in "${master_commit_ids[@]}"; do
if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/${commit_id}.json; then
LATEST_TESTED_COMMIT=${commit_id}
break
fi
done
aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/${LATEST_TESTED_COMMIT}.json cpu_runtime.json

if [[ "$COMMIT_SOURCE" == master ]]; then
# Prepare new baseline file
cp perf_test_numbers_cpu.json new_perf_test_numbers_cpu.json
python update_commit_hash.py new_perf_test_numbers_cpu.json ${PYTORCH_COMMIT_ID}
cp cpu_runtime.json new_cpu_runtime.json
python update_commit_hash.py new_cpu_runtime.json ${MASTER_COMMIT_ID}
fi

# Include tests
. ./test_cpu_speed_mini_sequence_labeler.sh
. ./test_cpu_speed_mnist.sh

# Run tests
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
if [[ "$COMMIT_SOURCE" == master ]]; then
run_test test_cpu_speed_mini_sequence_labeler 20 compare_and_update
run_test test_cpu_speed_mnist 20 compare_and_update
else
run_test test_cpu_speed_mini_sequence_labeler 20 compare_with_baseline
run_test test_cpu_speed_mnist 20 compare_with_baseline
fi

if [[ "$GIT_COMMIT" == *origin/master* ]]; then
# Push new baseline file
cp new_perf_test_numbers_cpu.json /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json
cd /var/lib/jenkins/host-workspace
git config --global user.email [email protected]
git config --global user.name Jenkins
git add perf_test_numbers_cpu.json
git commit -m "New CPU perf test baseline from ${PYTORCH_COMMIT_ID}"
if [[ "$COMMIT_SOURCE" == master ]]; then
# This could cause race condition if we are testing the same master commit twice,
# but the chance of them executing this line at the same time is low.
aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/${MASTER_COMMIT_ID}.json --acl public-read
fi
46 changes: 27 additions & 19 deletions .jenkins/short-perf-test-gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,32 @@ cd .jenkins/perf_test

echo "Running GPU perf test for PyTorch..."

# Get last master commit hash
export PYTORCH_COMMIT_ID=$(git log --format="%H" -n 1)
pip install awscli

# Get baseline file from https://github.com/yf225/perf-tests
if [ -f /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json ]; then
cp /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json perf_test_numbers_gpu.json
else
curl https://raw.githubusercontent.com/yf225/perf-tests/master/perf_test_numbers_gpu.json -O
# Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read
# More info at https://github.com/aws/aws-cli/issues/2321
aws configure set default.s3.multipart_threshold 5GB

if [[ "$COMMIT_SOURCE" == master ]]; then
# Get current master commit hash
export MASTER_COMMIT_ID=$(git log --format="%H" -n 1)
fi

if [[ "$GIT_COMMIT" == *origin/master* ]]; then
# Find the master commit to test against
IFS=$'\n'
master_commit_ids=($(git rev-list HEAD))
for commit_id in "${master_commit_ids[@]}"; do
if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/${commit_id}.json; then
LATEST_TESTED_COMMIT=${commit_id}
break
fi
done
aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/${LATEST_TESTED_COMMIT}.json gpu_runtime.json

if [[ "$COMMIT_SOURCE" == master ]]; then
# Prepare new baseline file
cp perf_test_numbers_gpu.json new_perf_test_numbers_gpu.json
python update_commit_hash.py new_perf_test_numbers_gpu.json ${PYTORCH_COMMIT_ID}
cp gpu_runtime.json new_gpu_runtime.json
python update_commit_hash.py new_gpu_runtime.json ${MASTER_COMMIT_ID}
fi

# Include tests
Expand All @@ -31,7 +43,7 @@ fi
. ./test_gpu_speed_mlstm.sh

# Run tests
if [[ "$GIT_COMMIT" == *origin/master* ]]; then
if [[ "$COMMIT_SOURCE" == master ]]; then
run_test test_gpu_speed_mnist 20 compare_and_update
run_test test_gpu_speed_word_language_model 20 compare_and_update
run_test test_gpu_speed_cudnn_lstm 20 compare_and_update
Expand All @@ -45,12 +57,8 @@ else
run_test test_gpu_speed_mlstm 20 compare_with_baseline
fi

if [[ "$GIT_COMMIT" == *origin/master* ]]; then
# Push new baseline file
cp new_perf_test_numbers_gpu.json /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json
cd /var/lib/jenkins/host-workspace
git config --global user.email [email protected]
git config --global user.name Jenkins
git add perf_test_numbers_gpu.json
git commit -m "New GPU perf test baseline from ${PYTORCH_COMMIT_ID}"
if [[ "$COMMIT_SOURCE" == master ]]; then
# This could cause race condition if we are testing the same master commit twice,
# but the chance of them executing this line at the same time is low.
aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/${MASTER_COMMIT_ID}.json --acl public-read
fi

0 comments on commit 2f8d658

Please sign in to comment.