Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
YU-Zhejian committed Apr 8, 2024
2 parents 6cc27c9 + a9bc553 commit 9785fab
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 44 deletions.
27 changes: 2 additions & 25 deletions doc/Makefile
Original file line number Diff line number Diff line change
@@ -1,24 +1,10 @@
SPHINX_OPTS := -j auto

#SPHINX_APIDOC_ARGS := --separate \
# --no-toc \
# --module-first \
# --maxdepth 2 \
# --force \
# --output-dir $(CURDIR)/_apidoc \
# --doc-project 'API Docs'

BUILD_DIR := $(CURDIR)/_build
# SPHINX_APIDOC := PYTHONPATH="$(dir $(CURDIR))/src:${PYTHONPATH:-}" sphinx-apidoc

.PHONY: all
all: html

#.PHONY: apidoc
#apidoc:
# rm -rf $(CURDIR)/_apidoc
# $(SPHINX_APIDOC) $(SPHINX_APIDOC_ARGS) $(dir $(CURDIR))/src/labw_utils

.PHONY: preconfigure
preconfigure: src
python preconfigure.py
Expand All @@ -28,21 +14,12 @@ src:
$(MAKE) -C src

.PHONY: html
html: src preconfigure # apidoc refs.bibtex.bib
html: src preconfigure
sphinx-build -M html $(CURDIR) $(BUILD_DIR) $(SPHINX_OPTS)

#refs.bibtex.bib: refs.bib
# biber --tool \
# --configfile=biberConf.xml \
# --output-file refs.bibtex.bib \
# --output-legacy-date \
# --output-field-replace=location:address,journaltitle:journal \
# refs.bib

.PHONY: clean
clean:
rm -rf $(BUILD_DIR)
$(MAKE) -C src clean
rm -rf $(BUILD_DIR) _cli_docs _root

.PHONY: distclean
distclean: clean
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ classifiers = [
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Topic :: Utilities"
]
requires-python = ">=3.7"
requires-python = ">=3.8"
dynamic = ["version", "readme", "dependencies", "optional-dependencies"]

[project.urls]
Expand Down
21 changes: 18 additions & 3 deletions src/yasim_sctcr/_main/generate_tcr_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,28 @@ def create_parser() -> argparse.ArgumentParser:
.. versionadded:: 0.1.0
"""
parser = ArgumentParserWithEnhancedFormatHelp(
prog="python -m yasim_sctcr generate_tcr_depth", description=__doc__.splitlines()[1]
prog="python -m yasim_sctcr generate_tcr_depth",
description=__doc__.splitlines()[1],
)
parser = patch_frontend_argument_parser(parser, "-b")
parser.add_argument(
"-o", "--out", required=True, help="Path to output depth TSV", nargs="?", type=str, action="store"
"-o",
"--out",
required=True,
help="Path to output depth TSV",
nargs="?",
type=str,
action="store",
)
parser.add_argument(
"-d",
"--depth",
required=True,
help="Simulated depth",
nargs="?",
type=int,
action="store",
)
parser.add_argument("-d", "--depth", required=True, help="Simulated depth", nargs="?", type=int, action="store")
return parser


Expand Down
7 changes: 7 additions & 0 deletions src/yasim_sctcr/_main/rearrange_tcr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import argparse
import json
import os
import random

from labw_utils.commonutils.appender import load_table_appender_class, TableAppenderConfig
from labw_utils.commonutils.importer.tqdm_importer import tqdm
Expand Down Expand Up @@ -165,6 +166,8 @@ def rearrange_tcr(

cdr3_insertion_table = Cdr3InsertionTable(cdr3_insertion_table_path)
cdr3_deletion_table = {k: {int(_k): _v for _k, _v in v.items()} for k, v in cdr3_deletion_table.items()}
rng = random.SystemRandom()
num_productive = 0
with get_writer(output_base_path + ".nt.fa") as nt_fasta_writer, load_table_appender_class("TSVTableAppender")(
filename=output_base_path + ".stats",
header=[
Expand All @@ -187,6 +190,8 @@ def rearrange_tcr(
tac=TableAppenderConfig(buffer_size=1024),
) as appender:
for i in tqdm(range(num_tcrs)):
is_productive = rng.random() >= portion_non_productive
num_productive += 1 if is_productive else 0
while True:
try:
cell = TCell.from_gene_names(
Expand All @@ -197,6 +202,7 @@ def rearrange_tcr(
usage_bias_tra=usage_bias_tra,
usage_bias_trb=usage_bias_trb,
tcr_uuid=f"TCR_{hex(i)}",
is_productive=is_productive,
)
except GenerationFailure as e:
# e.fgr.as_tuple()
Expand All @@ -219,4 +225,5 @@ def rearrange_tcr(
]
)
cell.save(os.path.join(output_base_path + ".json.d", cell.tcr_uuid + ".json"))
_lh.info("Generated with %d TCRs where %d are productive", num_tcrs, num_productive)
_lh.info("Finished with %d failures", n_failure)
62 changes: 47 additions & 15 deletions src/yasim_sctcr/helper/tcr.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class FullGenerationRecord:

def __init__(
self,
*,
uuid: str,
trav: str,
traj: str,
Expand Down Expand Up @@ -262,13 +263,15 @@ def __init__(
@classmethod
def from_gene_names(
cls,
*,
tcr_genelist: Dict[str, List[str]],
cdr3_deletion_table: Cdr3DeletionTableType,
cdr3_insertion_table: Cdr3InsertionTable,
tcr_cache: Dict[str, TCRTranslationTableType],
usage_bias_tra: Dict[str, int],
usage_bias_trb: Dict[str, int],
tcr_uuid: str,
is_productive: bool,
):
def choose_name_jv(
chain_type: Literal["a", "b"]
Expand Down Expand Up @@ -306,12 +309,35 @@ def clip_aa(
raise GenerationFailure(fgr)
return tr_cdr3_tt, trv_tt, trj_tt

def ensure_can_translate(tt: TCRTranslationTableType) -> TCRTranslationTableType:
ret_tt = []
for tt_a in tt:
def clip_ltr(tt: TCRTranslationTableType) -> TCRTranslationTableType:
"""
TODO: This function clips 5' UTRs only. May also needs to clip 3' UTRs.
"""
i = 0
for i in range(len(tt)):
tt_a = tt[i]
if "-" in tt_a[0] or tt_a[1] in ("*", "-") or tt_a[2] in ("*", "-"):
continue
ret_tt.append(tt_a)
else:
break
return tt[i:]

def productive_nonproductive_adjustment(tt: TCRTranslationTableType) -> TCRTranslationTableType:
ret_tt = []
if is_productive:
for tt_a in tt:
if "-" in tt_a[0] or tt_a[1] in ("*", "-") or tt_a[2] in ("*", "-"):
continue
ret_tt.append(tt_a)
else:
for tt_a in tt:
if "-" in tt_a[0] or tt_a[1] == "-" or tt_a[2] == "-":
tt_a[1] = "*"
tt_a[2] = "*"
tt_a[0] = tt_a[0].replace("-", "")
if tt_a[0] == "":
continue
ret_tt.append(tt_a)
return ret_tt

(traj_name, traj_tt), (trav_name, trav_tt) = choose_name_jv("a")
Expand Down Expand Up @@ -380,17 +406,18 @@ def clip_nt(
"W" if traj_name.upper() in {"TRAJ33", "TRAJ38", "TRAJ55"} else "F",
)
trb_cdr3_tt, trbv_tt, trbj_tt = clip_aa(trb_cdr3_tt, trbv_tt, trbj_tt, "F")

tra_cdr3_tt = ensure_can_translate(tra_cdr3_tt)
trav_tt = ensure_can_translate(trav_tt)
traj_tt = ensure_can_translate(traj_tt)
trac_tt = ensure_can_translate(trac_tt[:15]) # Reduced C-gene length conserved from empirical data
trb_cdr3_tt = ensure_can_translate(trb_cdr3_tt)
trbv_tt = ensure_can_translate(trbv_tt)
trbj_tt = ensure_can_translate(trbj_tt)
trbc_tt = ensure_can_translate(trbc_tt[:17]) # Reduced C-gene length conserved from empirical data

return cls(
trac_tt = trac_tt[:15]
trbc_tt = trbc_tt[:15]
tra_cdr3_tt = productive_nonproductive_adjustment(clip_ltr(tra_cdr3_tt))
trav_tt = productive_nonproductive_adjustment(clip_ltr(trav_tt))
traj_tt = productive_nonproductive_adjustment(clip_ltr(traj_tt))
trac_tt = productive_nonproductive_adjustment(clip_ltr(trac_tt))
trb_cdr3_tt = productive_nonproductive_adjustment(clip_ltr(trb_cdr3_tt))
trbv_tt = productive_nonproductive_adjustment(clip_ltr(trbv_tt))
trbj_tt = productive_nonproductive_adjustment(clip_ltr(trbj_tt))
trbc_tt = productive_nonproductive_adjustment(clip_ltr(trbc_tt))

retc = cls(
tcr_uuid=tcr_uuid,
trav_tt=trav_tt,
traj_tt=traj_tt,
Expand All @@ -407,6 +434,11 @@ def clip_nt(
trac_tt=trac_tt,
trbc_tt=trbc_tt,
)
if is_productive and ("*" in retc.alpha_aa or "*" in retc.beta_aa):
raise GenerationFailure(fgr)
if not is_productive and not ("*" in retc.alpha_aa or "*" in retc.beta_aa):
raise GenerationFailure(fgr)
return retc

def to_nt_fasta_record(self) -> str:
return "\n".join(
Expand Down

0 comments on commit 9785fab

Please sign in to comment.