Skip to content

Commit

Permalink
Remove requirement that sample file name and id match as the core wil…
Browse files Browse the repository at this point in the history
…l not follow this, time to sort out the issue with RSEM having it in the top of the file, add check utility to verify all the files in a manifest exist
  • Loading branch information
rcurrie committed Jan 15, 2017
1 parent 3bec67e commit cbcc4cb
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ts := $(shell /bin/date "+%Y%m%d-%H%M%S")

AZURE_FLAVOR=Standard_D1
OPENSTACK_FLAVOR=m1.small
AZURE_FLAVOR=Standard_D14
OPENSTACK_FLAVOR=z1.medium

create-openstack:
# Start an openstack docker-machine, specify size by make create-openstack OPENSTACK_FLAVOR=m1.small
Expand Down
24 changes: 19 additions & 5 deletions fabfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def log_error(message):
env.hosts.index(env.host),
int(limit) if limit else None, len(env.hosts)):
sample_id = sample["Submitter Sample ID"]
sample_files = sample["File Path"].split(",")
sample_files = map(str.strip, sample["File Path"].split(","))
print "{} processing {}".format(env.host, sample_id)

if os.path.exists("{}/{}".format(outputs, sample_id)):
Expand All @@ -180,15 +180,15 @@ def log_error(message):
# See if all the files exist
for sample in sample_files:
if not os.path.isfile(sample):
log_error("{} does not exist".format(sample))
log_error("{} for {} does not exist".format(sample, sample_id))
continue

# Hack to make sure sample name and file name match because RNASeq
# puts the file name as the gene_id in the RSEM file and MedBook
# uses that to name the sample.
if rnaseq == "True" and not os.path.basename(sample).startswith(sample_id):
log_error("Filename does not match sample id: {} {}".format(sample_id, sample))
continue
# if rnaseq == "True" and not os.path.basename(sample).startswith(sample_id):
# log_error("Filename does not match sample id: {} {}".format(sample_id, sample))
# continue

print "Resetting {}".format(env.host)
reset_machine()
Expand Down Expand Up @@ -260,6 +260,20 @@ def log_error(message):
f.write(json.dumps(methods, indent=4))


@runs_once
def check(manifest):
""" Check that each file in manifest exists """
for sample in csv.DictReader(open(manifest, "rU"), delimiter="\t"):
sample_id = sample["Submitter Sample ID"]
sample_files = map(str.strip, sample["File Path"].split(","))

# See if all the files exist
for sample in sample_files:
if not os.path.isfile(sample):
print("{} for {} does not exist".format(sample, sample_id))
continue


def verify():
# Verify md5 of rnaseq output from TEST samples
with cd("/mnt/data/outputs"):
Expand Down

0 comments on commit cbcc4cb

Please sign in to comment.