-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtransform-variants.sh
executable file
·35 lines (30 loc) · 1.03 KB
/
transform-variants.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
# transform-variants.sh s3://source sample s3://dest
SRC_DIR=$1
SAMPLE=$2
DEST_DIR=$3
DRIVER_MEMORY="58G"
EXECUTOR_MEMORY="58G"
HDFS_DIR="/data"
HDFS_PATH="hdfs://spark-master:8020$HDFS_DIR"
echo "creating $HDFS_DIR directory on hdfs..."
hadoop fs -mkdir -p "$HDFS_DIR"
echo "downloading $SRC_DIR/$SAMPLE.vcf.gz to $HDFS_PATH/$SAMPLE.vcf.gz with conductor..."
spark-submit \
conductor-0.5-SNAPSHOT/conductor-0.5-SNAPSHOT-distribution.jar \
$SRC_DIR/$SAMPLE.vcf.gz \
$HDFS_PATH/$SAMPLE.vcf.gz \
--concat
echo "converting $HDFS_PATH/$SAMPLE.vcf.gz to $HDFS_PATH/$SAMPLE.variants.adam..."
adam-submit \
--driver-memory $DRIVER_MEMORY \
--executor-memory $EXECUTOR_MEMORY \
-- \
transformVariants \
$HDFS_PATH/$SAMPLE.vcf.gz \
$HDFS_PATH/$SAMPLE.variants.adam
echo "uploading $HDFS_PATH/$SAMPLE.variants.adam to $DEST_DIR with conductor..."
spark-submit \
conductor-0.5-SNAPSHOT/conductor-0.5-SNAPSHOT-distribution.jar \
$HDFS_PATH/$SAMPLE.variants.adam \
$DEST_DIR/$SAMPLE.variants.adam