diff --git a/Makefile b/Makefile index f40850f..47344cd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ ts := $(shell /bin/date "+%Y%m%d-%H%M%S") -AZURE_FLAVOR=Standard_D1 -OPENSTACK_FLAVOR=m1.small +AZURE_FLAVOR=Standard_D14 +OPENSTACK_FLAVOR=z1.medium create-openstack: # Start an openstack docker-machine, specify size by make create-openstack OPENSTACK_FLAVOR=m1.small diff --git a/fabfile.py b/fabfile.py index 7d7d2bc..2d0b97d 100644 --- a/fabfile.py +++ b/fabfile.py @@ -170,7 +170,7 @@ def log_error(message): env.hosts.index(env.host), int(limit) if limit else None, len(env.hosts)): sample_id = sample["Submitter Sample ID"] - sample_files = sample["File Path"].split(",") + sample_files = map(str.strip, sample["File Path"].split(",")) print "{} processing {}".format(env.host, sample_id) if os.path.exists("{}/{}".format(outputs, sample_id)): @@ -180,15 +180,15 @@ def log_error(message): # See if all the files exist for sample in sample_files: if not os.path.isfile(sample): - log_error("{} does not exist".format(sample)) + log_error("{} for {} does not exist".format(sample, sample_id)) continue # Hack to make sure sample name and file name match because RNASeq # puts the file name as the gene_id in the RSEM file and MedBook # uses that to name the sample. - if rnaseq == "True" and not os.path.basename(sample).startswith(sample_id): - log_error("Filename does not match sample id: {} {}".format(sample_id, sample)) - continue + # if rnaseq == "True" and not os.path.basename(sample).startswith(sample_id): + # log_error("Filename does not match sample id: {} {}".format(sample_id, sample)) + # continue print "Resetting {}".format(env.host) reset_machine() @@ -260,6 +260,20 @@ def log_error(message): f.write(json.dumps(methods, indent=4)) +@runs_once +def check(manifest): + """ Check that each file in manifest exists """ + for sample in csv.DictReader(open(manifest, "rU"), delimiter="\t"): + sample_id = sample["Submitter Sample ID"] + sample_files = map(str.strip, sample["File Path"].split(",")) + + # See if all the files exist + for sample in sample_files: + if not os.path.isfile(sample): + print("{} for {} does not exist".format(sample, sample_id)) + continue + + def verify(): # Verify md5 of rnaseq output from TEST samples with cd("/mnt/data/outputs"):