Skip to content

Commit

Permalink
Neo: tweak neo sample scorer
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Sep 15, 2021
1 parent ddc0b5f commit 0d063e0
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -60,7 +61,7 @@ public class NeoEpitopeFile
public static final String DELIMITER = ",";
public static final String ITEM_DELIM = ";";
public static final String VAR_INFO_DELIM = ":";
public static final String FUSION_INFO_DELIM = ";";
private static final String FUSION_INFO_DELIM = ";";

protected static final Logger NEO_LOGGER = LogManager.getLogger(NeoEpitopeFile.class);

Expand Down Expand Up @@ -103,11 +104,17 @@ public NeoEpitopeFile(
WildtypeNovelAAMatch = wildtypeNovelAAMatch;
}

private static final String FILE_EXTENSION = ".imu.neo_epitopes.csv";
private static final String FILE_EXTENSION = ".neo.neoepitopes.csv";
private static final String OLD_FILE_EXTENSION = ".imu.neo_epitopes.csv";

@NotNull
public static String generateFilename(@NotNull final String basePath, @NotNull final String sample)
{
String filename = basePath + File.separator + sample + OLD_FILE_EXTENSION;

if(Files.exists(Paths.get(filename)))
return filename;

return basePath + File.separator + sample + FILE_EXTENSION;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public double calcSimilarity(final String allele, final String peptide)
catch(Exception e)
{
NE_LOGGER.error("invalid index({}) of peptide({}) or recogPeptide({}) for recognition similarity calc",
peptide, recogData.Peptide);
i, peptide, recogData.Peptide);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,109 +35,109 @@ public void close()
closeBufferedWriter(mPeptideWriter);
}

/*
private BufferedWriter initNeoepitopeWriter()
private BufferedWriter initPeptideWriter()
{
try
{
final String outputFileName = mConfig.formFilename("neoepitope");
final String outputFileName = mConfig.formFilename("allele_peptide_scores");

BufferedWriter writer = createBufferedWriter(outputFileName, false);
writer.write("SampleId,NeId,VariantType,VariantInfo,GeneName,AminoAcids");
writer.write(",Allele,PeptideCount,MaxLikelihood,SumLikelihood");
writer.write("SampleId,NeId,Allele,Peptide");
writer.write(",Score,Rank,Likelihood,LikelihoodRank,ExpLikelihood,ExpLikelihoodRank,RecognitionSim");
writer.write(",AllelCN,AlleleDisrupted");
writer.write(",TpmSampleUp,TpmSampleDown,TpmCancer,TpmCohort,RnaFrags,RnaDepth");
writer.write(",TpmUp,TpmDown,TpmCancer,TpmCohort,RnaFrags,RnaDepth");
writer.newLine();
return writer;
}
catch (IOException e)
{
NE_LOGGER.error("failed to create neoepitope writer: {}", e.toString());
NE_LOGGER.error("failed to create peptide writer: {}", e.toString());
return null;
}
}

public synchronized void writeNeoData(
final String sampleId, final NeoEpitopeData neoData, final NeoPredictionData allelePredData, final AlleleCoverage alleleCoverage)
public synchronized void writePeptideData(
final String sampleId, final NeoEpitopeData neoData, final NeoPredictionData predData, final AlleleCoverage alleleCoverage)
{
if(mNeoWriter == null)
if(mPeptideWriter == null)
return;

try
{
mNeoWriter.write(String.format("%s,%d,%s,%s,%s,%s",
sampleId, neoData.Id, neoData.VariantType, neoData.VariantInfo, neoData.GeneName, neoData.AminoAcids));
for(BindData bindData : predData.getPeptidePredictions(alleleCoverage.Allele))
{
if(bindData.likelihoodRank() > mConfig.LikelihoodThreshold)
continue;

mNeoWriter.write(String.format(",%s,%d,%.4f,%.4f",
alleleCoverage.Allele, allelePredData.Peptides, allelePredData.MaxLikelihood, allelePredData.SumLikelihood));
mPeptideWriter.write(String.format("%s,%d,%s,%s", sampleId, neoData.Id, bindData.Allele, bindData.Peptide));

mNeoWriter.write(String.format(",%.2f,%s", alleleCoverage.CopyNumber, alleleCoverage.isLost()));
mPeptideWriter.write(String.format(",%.4f,%.6f,%.6f,%.6f,%.6f,%.6f,%.1f",
bindData.score(), bindData.rankPercentile(), bindData.likelihood(), bindData.likelihoodRank(),
bindData.expressionLikelihood(), bindData.expressionLikelihoodRank(), bindData.recognitionSimilarity()));

mNeoWriter.write(String.format(",%4.3e,%4.3e,%4.3e,%4.3e,%d,%.0f",
neoData.TransExpression[FS_UP], neoData.TransExpression[FS_DOWN], neoData.TpmCancer, neoData.TpmCohort,
neoData.RnaNovelFragments, (neoData.RnaBaseDepth[SE_START] + neoData.RnaBaseDepth[SE_END]) * 0.5));
mPeptideWriter.write(String.format(",%.2f,%s", alleleCoverage.CopyNumber, alleleCoverage.isLost()));

mNeoWriter.newLine();
mPeptideWriter.write(String.format(",%4.3e,%4.3e,%4.3e,%4.3e,%d,%.0f",
neoData.TransExpression[FS_UP], neoData.TransExpression[FS_DOWN], neoData.TpmCancer, neoData.TpmCohort,
neoData.RnaNovelFragments, (neoData.RnaBaseDepth[SE_START] + neoData.RnaBaseDepth[SE_END]) * 0.5));

mPeptideWriter.newLine();
}
}
catch (IOException e)
{
NE_LOGGER.error("failed to write neo-epitope data: {}", e.toString());
NE_LOGGER.error("failed to write peptide data: {}", e.toString());
}
}
*/

private BufferedWriter initPeptideWriter()
/*
private BufferedWriter initNeoepitopeWriter()
{
try
{
final String outputFileName = mConfig.formFilename("allele_peptide");
final String outputFileName = mConfig.formFilename("neoepitope");
BufferedWriter writer = createBufferedWriter(outputFileName, false);
writer.write("SampleId,NeId,Allele,Peptide");
writer.write(",Score,Rank,Likelihood,LikelihoodRank,ExpLikelihood,ExpLikelihoodRank,RecognitionSim");
writer.write("SampleId,NeId,VariantType,VariantInfo,GeneName,AminoAcids");
writer.write(",Allele,PeptideCount,MaxLikelihood,SumLikelihood");
writer.write(",AllelCN,AlleleDisrupted");
writer.write(",TpmUp,TpmDown,TpmCancer,TpmCohort,RnaFrags,RnaDepth");
writer.write(",TpmSampleUp,TpmSampleDown,TpmCancer,TpmCohort,RnaFrags,RnaDepth");
writer.newLine();
return writer;
}
catch (IOException e)
{
NE_LOGGER.error("failed to create peptide writer: {}", e.toString());
NE_LOGGER.error("failed to create neoepitope writer: {}", e.toString());
return null;
}
}
public synchronized void writePeptideData(
final String sampleId, final NeoEpitopeData neoData, final NeoPredictionData predData, final AlleleCoverage alleleCoverage)
public synchronized void writeNeoData(
final String sampleId, final NeoEpitopeData neoData, final NeoPredictionData allelePredData, final AlleleCoverage alleleCoverage)
{
if(mPeptideWriter == null)
if(mNeoWriter == null)
return;
try
{
for(BindData bindData : predData.getPeptidePredictions(alleleCoverage.Allele))
{
if(bindData.likelihoodRank() > mConfig.LikelihoodThreshold)
continue;

mPeptideWriter.write(String.format("%s,%d,%s,%s", sampleId, neoData.Id, bindData.Allele, bindData.Peptide));
mNeoWriter.write(String.format("%s,%d,%s,%s,%s,%s",
sampleId, neoData.Id, neoData.VariantType, neoData.VariantInfo, neoData.GeneName, neoData.AminoAcids));
mPeptideWriter.write(String.format(",%.4f,%.6f,%.6f,%.6f,%.6f,%.6f,%.1f",
bindData.score(), bindData.rankPercentile(), bindData.likelihood(), bindData.likelihoodRank(),
bindData.expressionLikelihood(), bindData.expressionLikelihoodRank(), bindData.recognitionSimilarity()));
mNeoWriter.write(String.format(",%s,%d,%.4f,%.4f",
alleleCoverage.Allele, allelePredData.Peptides, allelePredData.MaxLikelihood, allelePredData.SumLikelihood));
mPeptideWriter.write(String.format(",%.2f,%s", alleleCoverage.CopyNumber, alleleCoverage.isLost()));
mNeoWriter.write(String.format(",%.2f,%s", alleleCoverage.CopyNumber, alleleCoverage.isLost()));
mPeptideWriter.write(String.format(",%4.3e,%4.3e,%4.3e,%4.3e,%d,%.0f",
neoData.TransExpression[FS_UP], neoData.TransExpression[FS_DOWN], neoData.TpmCancer, neoData.TpmCohort,
neoData.RnaNovelFragments, (neoData.RnaBaseDepth[SE_START] + neoData.RnaBaseDepth[SE_END]) * 0.5));
mNeoWriter.write(String.format(",%4.3e,%4.3e,%4.3e,%4.3e,%d,%.0f",
neoData.TransExpression[FS_UP], neoData.TransExpression[FS_DOWN], neoData.TpmCancer, neoData.TpmCohort,
neoData.RnaNovelFragments, (neoData.RnaBaseDepth[SE_START] + neoData.RnaBaseDepth[SE_END]) * 0.5));
mPeptideWriter.newLine();
}
mNeoWriter.newLine();
}
catch (IOException e)
{
NE_LOGGER.error("failed to write peptide data: {}", e.toString());
NE_LOGGER.error("failed to write neo-epitope data: {}", e.toString());
}
}
*/
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,12 @@ public class NeoScorer
private final NeoScorerConfig mConfig;
private final NeoDataWriter mWriters;
private final BindScorer mPeptideScorer;
private final RnaExpressionMatrix mTranscriptExpression;

public NeoScorer(final CommandLine cmd)
{
mConfig = new NeoScorerConfig(cmd);

mPeptideScorer = new BindScorer(new ScoreConfig(cmd));
mTranscriptExpression = new RnaExpressionMatrix(cmd.getOptionValue(SAMPLE_TRANS_EXP_FILE), EXPRESSION_SCOPE_TRANS);

mWriters = new NeoDataWriter(mConfig);
}
Expand All @@ -50,13 +48,18 @@ public void run()
System.exit(1);
}

NE_LOGGER.info("running neoepitope scoring for {}",
mConfig.SampleIds.size() == 1 ? mConfig.SampleIds.get(0) : String.format("%d samples", mConfig.SampleIds.size()));

RnaExpressionMatrix transcriptExpression = new RnaExpressionMatrix(mConfig.SampleTranscriptExpressionFile, EXPRESSION_SCOPE_TRANS);

NE_LOGGER.info("processing {} samples", mConfig.SampleIds.size());

List<NeoScorerTask> sampleTasks = Lists.newArrayList();

for(String sampleId : mConfig.SampleIds)
{
NeoScorerTask sampleTask = new NeoScorerTask(sampleId, mConfig, mPeptideScorer, mTranscriptExpression, mWriters);
NeoScorerTask sampleTask = new NeoScorerTask(sampleId, mConfig, mPeptideScorer, transcriptExpression, mWriters);

sampleTasks.add(sampleTask);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@

public class NeoScorerConfig
{
public final String OutputDir;
public final String OutputId;
public final String NeoDataDir;
public final String LilacDataDir;
public final String IsofoxDataDir;
public final String SampleTranscriptExpressionFile;

public final List<String> SampleIds;

public final String OutputDir;
public final String OutputId;
public final List<OutputType> WriteTypes;

public final double LikelihoodThreshold;
Expand All @@ -50,7 +52,9 @@ public NeoScorerConfig(final CommandLine cmd)
LilacDataDir = cmd.getOptionValue(LILAC_DATA_DIR);
IsofoxDataDir = cmd.getOptionValue(ISF_DATA_DIR);

LikelihoodThreshold = Double.parseDouble(cmd.getOptionValue(LIKELIHOOD_THRESHOLD, "0.001"));
SampleTranscriptExpressionFile = cmd.getOptionValue(SAMPLE_TRANS_EXP_FILE);

LikelihoodThreshold = Double.parseDouble(cmd.getOptionValue(LIKELIHOOD_THRESHOLD, "0.02"));

OutputDir = parseOutputDir(cmd);
OutputId = cmd.getOptionValue(OUTPUT_ID);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.hartwig.hmftools.neo.cohort;

import static com.hartwig.hmftools.common.codon.AminoAcidRna.AA_SELENOCYSTEINE;
import static com.hartwig.hmftools.common.fusion.FusionCommon.FS_DOWN;
import static com.hartwig.hmftools.common.fusion.FusionCommon.FS_UP;
import static com.hartwig.hmftools.common.rna.RnaExpressionMatrix.INVALID_EXP;
Expand Down Expand Up @@ -198,6 +199,9 @@ private NeoPredictionData produceAllelePeptides(final NeoEpitopeData neoData, fi

for(PeptideData peptideData : peptides)
{
if(peptideData.Peptide.contains(AA_SELENOCYSTEINE))
continue;

BindData bindData = new BindData(
allele.Allele, peptideData.Peptide, "", peptideData.UpFlank, peptideData.DownFlank);

Expand Down

0 comments on commit 0d063e0

Please sign in to comment.