Skip to content

Commit

Permalink
Force-include old titer reference strains
Browse files Browse the repository at this point in the history
Adds a new column to the metadata indicating whether a strain is a titer
reference strain or not and updates the subsampling logic for private
builds to force-include titer reference strains as far back in time as
other reference strains. This change fixes an issue where the 2-year
limit on titer strains would cause reference strains to be dropped from
builds and cause their titer measurements to also be dropped even if
those measurements were against more recent test viruses. Historically,
we manually maintained a list of reference strains per subtype to make
sure these kinds of older titer references were force-included.
Maintaining this list isn't sustainable or necessary, since we can
automate the process like we do in this commit.
  • Loading branch information
huddlej committed Jan 31, 2025
1 parent 605772b commit d216823
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 8 deletions.
4 changes: 4 additions & 0 deletions profiles/nextflu-private.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ builds:
# We allow egg-passage strains when they have titer measurements,
# enabling titer models to be fit to egg-passaged data.
filters: --query "(is_titer_strain == True)" --min-date {min_date} --exclude {exclude}
titer_reference_strains:
# We allow egg-passage strains when they have titer measurements,
# enabling titer models to be fit to egg-passaged data.
filters: --query "(is_titer_reference_strain == True)" --min-date {reference_min_date} --exclude {exclude}
h3n2_2y_titers:
lineage: "h3n2"
reference: "config/h3n2/{segment}/reference.fasta"
Expand Down
37 changes: 29 additions & 8 deletions workflow/snakemake_rules/select_strains.smk
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ rule concat_titers_for_build:
"logs/concat_titers_for_build_{build_name}.txt"
shell:
"""
tsv-append -H {input.titers} | tsv-select -H -f virus_strain > {output.titers} 2> {log}
tsv-append -H {input.titers} | tsv-select -H -f virus_strain,serum_strain > {output.titers} 2> {log}
"""

rule titer_priorities:
Expand Down Expand Up @@ -190,12 +190,37 @@ rule build_titer_strains_table:
--expression "'True'" > {output.titer_strains}
"""

rule build_titer_reference_strains_table:
input:
titers="builds/{build_name}/all_titers.tsv",
output:
titer_strains=build_dir + "/{build_name}/titer_reference_strains.tsv",
conda: "../envs/nextstrain.yaml"
benchmark:
"benchmarks/build_titer_reference_strains_table_{build_name}.txt"
log:
"logs/build_titer_reference_strains_table_{build_name}.txt"
shell:
"""
csvtk --tabs cut \
--fields serum_strain \
{input.titers} \
| csvtk rename \
--fields serum_strain \
--names strain \
| csvtk uniq \
| csvtk --out-tabs mutate2 \
--name is_titer_reference_strain \
--expression "'True'" > {output.titer_strains}
"""

# Annotate strains in the metadata based on whether they have titer data or not,
# so we can include these strains by attribute from augur filter later.
rule annotate_metadata_with_titer_strains:
input:
metadata=lambda wildcards: f"data/{config['builds'][wildcards.build_name]['lineage']}/metadata.tsv",
references=build_dir + "/{build_name}/titer_strains.tsv",
titer_strains=build_dir + "/{build_name}/titer_strains.tsv",
titer_reference_strains=build_dir + "/{build_name}/titer_reference_strains.tsv",
output:
metadata=build_dir + "/{build_name}/full_metadata_with_titer_annotations.tsv",
conda: "../envs/nextstrain.yaml"
Expand All @@ -205,12 +230,8 @@ rule annotate_metadata_with_titer_strains:
"logs/annotate_metadata_with_titer_strains_{build_name}.txt"
shell:
"""
csvtk --tabs join \
--left-join \
--na "False" \
-f "strain" \
{input.metadata} \
{input.references} > {output.metadata}
csvtk --tabs join --left-join --na "False" -f "strain" {input.metadata} {input.titer_strains} \
| csvtk --tabs join --left-join --na "False" -f "strain" /dev/stdin {input.titer_reference_strains} > {output.metadata}
"""

def get_metadata_for_subsampling(wildcards):
Expand Down

0 comments on commit d216823

Please sign in to comment.