Skip to content

Commit

Permalink
evaly mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Liang Huang committed May 24, 2024
1 parent a730969 commit ba15b61
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
4 changes: 3 additions & 1 deletion linearpartition
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def setgflags():
flags.DEFINE_string('shape', '', "import SHAPE data for SHAPE guided LinearPartition (DEFAULT: not use SHAPE data)") # SHAPE
flags.DEFINE_boolean('fasta', False, "input is in fasta format") # FASTA format
flags.DEFINE_integer('dangles', 2, "the way to treat `dangling end' energies for bases adjacent to helices in free ends and multi-loops (only supporting `0' or `2', default=`2')", short_name="d")
flags.DEFINE_string('evaly', "", "batch eval all sequences against structure for p(y|x)", short_name='y') # p(y|x)
argv = FLAGS(sys.argv)

def main():
Expand All @@ -50,6 +51,7 @@ def main():
shape_file_path = str(FLAGS.shape)
is_fasta = '1' if FLAGS.fasta else '0'
dangles = str(FLAGS.dangles)
evaly = FLAGS.evaly

if FLAGS.p and (FLAGS.o or FLAGS.prefix):
print("\nWARNING: -p mode has no output for base pairing probability matrix!\n");
Expand Down Expand Up @@ -81,7 +83,7 @@ def main():
exit();

path = os.path.dirname(os.path.abspath(__file__))
cmd = ["%s/%s" % (path, ('bin/linearpartition_v' if use_vienna else 'bin/linearpartition_c')), beamsize, is_sharpturn, is_verbose, bpp_file, bpp_prefix, pf_only, bpp_cutoff, forest_file, mea, gamma, TK, threshold, ThreshKnot_prefix, MEA_prefix, MEA_bpseq, shape_file_path, is_fasta, dangles]
cmd = ["%s/%s" % (path, ('bin/linearpartition_v' if use_vienna else 'bin/linearpartition_c')), beamsize, is_sharpturn, is_verbose, bpp_file, bpp_prefix, pf_only, bpp_cutoff, forest_file, mea, gamma, TK, threshold, ThreshKnot_prefix, MEA_prefix, MEA_bpseq, shape_file_path, is_fasta, dangles, evaly]
subprocess.call(cmd, stdin=sys.stdin)

if __name__ == '__main__':
Expand Down
26 changes: 20 additions & 6 deletions src/LinearPartition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "Utils/utility_v.h"
#include "bpp.cpp"

#include "LinearFoldEval.h" // for p(y|x)

#define SPECIAL_HP

using namespace std;
Expand Down Expand Up @@ -95,7 +97,7 @@ void BeamCKYParser::postprocess() {
delete[] nucs;
}

void BeamCKYParser::parse(string& seq) {
double BeamCKYParser::parse(string& seq) {

struct timeval parse_starttime, parse_endtime;

Expand Down Expand Up @@ -441,10 +443,13 @@ void BeamCKYParser::parse(string& seq) {
gettimeofday(&parse_endtime, NULL);
double parse_elapsed_time = parse_endtime.tv_sec - parse_starttime.tv_sec + (parse_endtime.tv_usec-parse_starttime.tv_usec)/1000000.0;

double ensemble;
#ifdef lpv
fprintf(stderr,"Free Energy of Ensemble: %.5f kcal/mol\n", -kT * viterbi.alpha / 100.0);
ensemble = -kT * viterbi.alpha / 100.0; // -kT log(Q(x))
fprintf(stderr,"Free Energy of Ensemble: %.5f kcal/mol\n", ensemble);
#else
fprintf(stderr,"Log Partition Coefficient: %.5f\n", viterbi.alpha);
ensemble = viterbi.alpha;
fprintf(stderr,"Log Partition Coefficient: %.5f\n", ensemble);
#endif

if(is_verbose) fprintf(stderr,"Partition Function Calculation Time: %.2f seconds.\n", parse_elapsed_time);
Expand All @@ -465,7 +470,7 @@ void BeamCKYParser::parse(string& seq) {
if (threshknot_) ThreshKnot(seq);
}
postprocess();
return;
return ensemble;
}

void BeamCKYParser::print_states(FILE *fptr, unordered_map<int, State>& states, int j, string label, bool inside_only, double threshold) {
Expand Down Expand Up @@ -605,6 +610,7 @@ int main(int argc, char** argv){
string ThresKnot_prefix;
bool fasta = false;
int dangles = 2;
string ystruct = ""; // p(y|x)

// SHAPE
string shape_file_path = "";
Expand All @@ -628,6 +634,7 @@ int main(int argc, char** argv){
shape_file_path = argv[16];
fasta = atoi(argv[17]) == 1;
dangles = atoi(argv[18]);
ystruct = argv[19]; // for p(y|x)
}

if (is_verbose) printf("beam size: %d\n", beamsize);
Expand Down Expand Up @@ -661,7 +668,7 @@ int main(int argc, char** argv){
}
if (!rna_seq.empty())
rna_seq_list.push_back(rna_seq);
}else{
} else {
for (string seq; getline(cin, seq);){
if (seq.empty()) continue;
if (seq[0] == '>' or seq[0] == ';') continue;
Expand All @@ -673,6 +680,7 @@ int main(int argc, char** argv){
}
}

// TODO: no need to store all seqs
for(int i = 0; i < rna_seq_list.size(); i++){
if (rna_name_list.size() > i)
printf("%s\n", rna_name_list[i].c_str());
Expand Down Expand Up @@ -706,7 +714,13 @@ int main(int argc, char** argv){
// lhuang: moved inside loop, fixing an obscure but crucial bug in initialization
BeamCKYParser parser(beamsize, !sharpturn, is_verbose, bpp_file, bpp_file_index, pf_only, bpp_cutoff, forest_file, mea, MEA_gamma, MEA_file_index, MEA_bpseq, ThreshKnot, ThreshKnot_threshold, ThreshKnot_file_index, shape_file_path, fasta, dangles);

parser.parse(rna_seq);
double ensemble = parser.parse(rna_seq); // ensemble free energy

if (!ystruct.empty()) {
double energy = -eval(rna_seq, ystruct, is_verbose, dangles)/100.; // LinearFoldEval.h
double prob = exp(100*(energy-ensemble)/ -kT);
printf("x= %s\ty= %s\tDeltaG(x,y)= %.2f\t-kTlogQ(x)= %.5f\tp(y|x)= %.5f\n", rna_seq.c_str(), ystruct.c_str(), energy, ensemble, prob);
}
}

gettimeofday(&total_endtime, NULL);
Expand Down
2 changes: 1 addition & 1 deletion src/LinearPartition.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class BeamCKYParser {
int dangles=1);

// DecoderResult parse(string& seq);
void parse(string& seq);
double parse(string& seq);

private:
void get_parentheses(char* result, string& seq);
Expand Down

0 comments on commit ba15b61

Please sign in to comment.