This repository has been archived by the owner on Jun 3, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathsetup_step_1_cloud_project.sh
executable file
·307 lines (272 loc) · 8.83 KB
/
setup_step_1_cloud_project.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#!/bin/bash -eu
#
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Set up network, compute engine VM, storage, and dataset for the sc17 project.
#
# To ensure correct setup, run this script in the default cloud shell
# in the GCP web console (the ">_" icon at the top right).
err() {
echo "ERROR [$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" >&2
exit 1
}
# Check argument length
if [ "$#" -lt 1 ]; then
echo "Usage: $0 project-name [gpu-type] [compute-region] [dataflow-region]"
echo " project-name is the project name you specified during setup"
echo " gpu-type: either K80, P100, or None"
echo " (default: None)"
echo " compute-region: where you will be running data science notebooks"
echo " (default: us-east1)"
echo " dataflow-region: where you will run dataflow preprocessing jobs"
echo " (default: us-central1)"
exit 1
fi
PROJECT=$1
# Parse optional arguments
if [ "$#" -ge 3 ]; then
COMPUTE_REGION=$3
else
COMPUTE_REGION=us-east1
fi
if [ "$#" -ge 4 ]; then
DATAFLOW_REGION=$4
else
DATAFLOW_REGION=us-central1
fi
if [ "$#" -ge 2 ]; then
GPU_TYPE=$2
else
GPU_TYPE=None
fi
#### Check quotas ####
#######################################
# Check quota and throw error if quota value is below the lower bound.
#
# The purpose of enforcing these quota bounds is to ensure a good user
# experience, since without quota increases some jobs and notebooks may take
# very long to run.
#
# Globals:
# None
# Arguments:
# quota_json: the quota json to parse
# metric_name: the quota metric name
# lower_bound: lower bound on the quota value
# Returns:
# None
#######################################
check_quota() {
METRIC=$(echo "$1" \
| jq ".quotas | map(select(.metric ==\"$2\")) | .[0].limit")
if [ "$METRIC" -lt $3 ]; then
err "Quota $2 must be at least $3: value = $METRIC. Please increase quota!"
fi
}
GLOBAL_QUOTA=$(gcloud --format json compute project-info \
describe --project $PROJECT)
DATAFLOW_REGION_QUOTA=$(gcloud --format json compute regions \
describe $DATAFLOW_REGION)
# The quota requirements here may be lower than the recommended in the README,
# but this is mainly to ensure a minimum level of performance.
echo "Checking quotas for $DATAFLOW_REGION..."
check_quota "$DATAFLOW_REGION_QUOTA" IN_USE_ADDRESSES 50
check_quota "$DATAFLOW_REGION_QUOTA" DISKS_TOTAL_GB 65536
check_quota "$DATAFLOW_REGION_QUOTA" CPUS 200
echo "Checking global quotas..."
check_quota "$GLOBAL_QUOTA" CPUS_ALL_REGIONS 200
# If a gpu type is requested, check that you have at least one available
# in the requested region.
if [ "$GPU_TYPE" != "None" ]; then
echo "Checking quotas for $COMPUTE_REGION..."
# Get cloud compute info for your region
COMPUTE_INFO=$(gcloud compute regions describe $COMPUTE_REGION) || exit 1
# Verify that you have gpus available to create a VM
GPUS_QUOTA=$(echo "$COMPUTE_INFO" \
| grep -B1 "metric: NVIDIA_${GPU_TYPE}" \
| grep limit \
| cut -d' ' -f3 \
| cut -d'.' -f1)
# Get gpus in use by parsing the output
GPUS_IN_USE=$(echo "$COMPUTE_INFO" \
| grep -A1 "metric: NVIDIA_${GPU_TYPE}" \
| grep usage \
| cut -d' ' -f4 \
| cut -d'.' -f1)
# Verify that you still have gpus available
if [ "$GPUS_QUOTA" -gt "$GPUS_IN_USE" ]; then
echo "GPUs are available for creating a new VM!"
else
err "Not enough GPUs available! Either increase quota or stop existing VMs
with GPUs. Quota: $GPUS_QUOTA, Avail: $GPUS_IN_USE"
fi
fi
# Get a default zone for the vm based on region and GPU availabilities
# See https://cloud.google.com/compute/docs/gpus/#introduction.
if [ "$GPU_TYPE" == "K80" ]; then
ACCELERATOR_ARG="--accelerator type=nvidia-tesla-k80,count=1"
case "$COMPUTE_REGION" in
"us-east1")
ZONE="us-east1-d"
;;
"us-west1")
ZONE="us-west1-b"
;;
"europe-west1")
ZONE="europe-west1-b"
;;
"asia-east1")
ZONE="asia-east1-b"
;;
*)
err "Zone does not contain any gpus of type $GPU_TYPE"
;;
esac
elif [ "$GPU_TYPE" == "P100" ]; then
ACCELERATOR_ARG="--accelerator type=nvidia-tesla-p100,count=1"
case "$COMPUTE_REGION" in
"us-east1")
ZONE="us-east1-c"
;;
"us-west1")
ZONE="us-west1-b"
;;
"europe-west1")
ZONE="europe-west1-d"
;;
"asia-east1")
ZONE="asia-east1-a"
;;
*)
err "Zone does not contain any gpus of type $GPU_TYPE"
;;
esac
elif [ "$GPU_TYPE" != "None" ]; then
err "Unknown GPU type $GPU_TYPE"
else
ACCELERATOR_ARG=""
# Assign zone which is the region name + "-a",
# except regions without such zones
case "$COMPUTE_REGION" in
"europe-west1")
ZONE="europe-west1-b"
;;
"us-east1")
ZONE="us-east1-b"
;;
*)
ZONE="$COMPUTE_REGION-a"
;;
esac
fi
echo "All quota requirements passed!"
echo "Using zone $ZONE for data science VM creation"
#### CLOUD PROJECT APIS ####
# Enable google apis if they aren't already enabled
ENABLED_SERVICES=$(gcloud services list --enabled)
check_and_enable_service() {
SERVICE_ENABLED=$(echo "$ENABLED_SERVICES" | grep $1) || true
if [[ -z "$SERVICE_ENABLED" ]]; then
gcloud services enable $1 || err "could not enable service $1"
fi
}
echo "Checking and Enabling Google APIs..."
check_and_enable_service dataflow.googleapis.com
check_and_enable_service logging.googleapis.com
check_and_enable_service cloudresourcemanager.googleapis.com
echo "APIs enabled!"
#### CREATE VM FOR DATA SCIENCE ####
# Create a VM to use for supercomputing!
# Note that notebooks primarily run on a single cpu core, so we do not need too
# many cores. Tensorflow will be configured to use a gpu, which significantly
# speeds up the running time of deep net training.
if [ "$GPU_TYPE" != "None" ]; then
STARTUP_SCRIPT='#!/bin/bash
echo "Checking for CUDA and installing."
# Check for CUDA and try to install.
if ! dpkg-query -W cuda-8-0; then
curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
dpkg -i ./cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
apt-get update
apt-get install cuda-8-0 -y
fi'
else
STARTUP_SCRIPT=""
fi
COMPUTE_INSTANCE=$PROJECT-compute-instance
gcloud compute instances create $COMPUTE_INSTANCE \
--zone $ZONE \
--custom-cpu 2 \
--custom-memory 13 \
$ACCELERATOR_ARG \
--image-family ubuntu-1604-lts \
--image-project ubuntu-os-cloud \
--boot-disk-type pd-standard \
--boot-disk-size 256GB \
--scopes cloud-platform \
--network default \
--maintenance-policy TERMINATE \
--restart-on-failure \
--metadata startup-script="'$STARTUP_SCRIPT'" \
|| err "Error starting up compute instance"
echo "Compute instance created successfully! GPU=$GPU_TYPE"
#### SETUP VM ENVIRONMENT FOR DATA SCIENCE ####
# Create bashrc modification script
BUCKET=$PROJECT-bucket
echo "
#!/bin/bash
sed -i '/export PROJECT.*/d' ~/.bashrc
echo \"export PROJECT=$PROJECT\" >> ~/.bashrc
echo \"Added PROJECT=$PROJECT to .bashrc\"
sed -i '/export BUCKET.*/d' ~/.bashrc
echo \"export BUCKET=$BUCKET\" >> ~/.bashrc
echo \"Added BUCKET=$BUCKET to .bashrc\"
sed -i '/export DATAFLOW_REGION.*/d' ~/.bashrc
echo \"export DATAFLOW_REGION=$DATAFLOW_REGION\" >> ~/.bashrc
echo \"Added the following lines to .bashrc\"
tail -n 3 ~/.bashrc
" > env_vars.sh
retry=0
# Retry SSH-ing 5 times
until [ $retry -ge 5 ]
do
echo "SSH retries: $retry"
# Create a temporary firewall rule for ssh connections
# Ignore error if rule is already created.
gcloud compute firewall-rules create allow-ssh \
--allow tcp:22 \
--source-ranges 0.0.0.0/0 \
|| true
# SSH and execute script
gcloud compute config-ssh
gcloud compute ssh --zone $ZONE $COMPUTE_INSTANCE -- 'bash -s' < ~/env_vars.sh \
&& break
retry=$[$retry + 1]
echo "SSH connection failed. Machine may still be booting up. Retrying in 30 seconds..."
sleep 30
done
if [ $retry -ge 5 ]; then
err "Unsuccessful setting up bashrc environment for $COMPUTE_INSTANCE"
fi
#### STORAGE ####
# Create a storage bucket for the demo if it doesn't exist.
echo "Creating a storage bucket and dataset..."
gsutil mb -p $PROJECT -c regional -l $COMPUTE_REGION gs://$PROJECT-bucket/ \
|| true
# Create a new dataset for storing tables in this project if it doesn't exist
# If this is your first time making a dataset, you may be prompted to select
# a default project.
bq mk dataset || true
echo "Done!"