Skip to content

Commit

Permalink
core alignment process
Browse files Browse the repository at this point in the history
  • Loading branch information
HongzheGuo committed Aug 19, 2015
1 parent 0e05340 commit adf74ef
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 66 deletions.
50 changes: 25 additions & 25 deletions load_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,35 +167,35 @@ static int index_build_usage()
}

static int load_input_usage()
{
{
fprintf(stderr, "\n");
fprintf(stderr, "Program: De Brijn Graph mapping system seed reduction and alignment\n");
fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
fprintf(stderr, "Contact: Hongzhe Guo <[email protected]>\n\n");
fprintf(stderr, "Usage: deBGA aln [options] <index_route> <read pair-end1.fq> [read pair-end2.fq] <result_file.sam>\n\n");
fprintf(stderr, "Options: -k INT length of kmer [21-28]\n");
fprintf(stderr, " -u INT upper limit of library insert-size [%u] (only for pair-end)\n", upper_ins);
fprintf(stderr, " -f INT floor limit of library insert-size [%u] (only for pair-end)\n", floor_ins);
fprintf(stderr, " -o INT the maximum output number (only for pair-end)[%u]\n", cus_ali_n);
fprintf(stderr, " -v NUM the max editing distance rate for LandauVishkin [%.2f] (only for pair-end)\n", lv_rate);
fprintf(stderr, " -p INT number of threads [%u]\n", thread_n);
fprintf(stderr, " -l INT the maximum read length [%u]\n", readlen_max);
fprintf(stderr, " -r INT seed coverage length filter [%u]\n", length_reduce);
fprintf(stderr, " -i INT minimum seeds interval on read [%u]\n", seed_step);
fprintf(stderr, " -s INT number of seed circulation [%u]\n", cir_fix_n);
fprintf(stderr, " -n INT seed positions filter [%u]\n", pos_n_max);
fprintf(stderr, " -x INT the maximum output number of anchor [%u]\n", cus_max_output_ali);
fprintf(stderr, " -c NUM the max rate of alignment score [%.2f]\n", max_pair_score_r);
fprintf(stderr, " -e INT the filter number of seed positions [%u]\n", seed_filter_pos_numn);
//fprintf(stderr, " --aanchor NUM the max rate of mismatch alignment on anchor [%.2f]\n", mis_match_r_single);
//fprintf(stderr, " --vanchor NUM the max editing distance rate for LandauVishkin on anchor [%.2f]\n", lv_rate_anchor);
fprintf(stderr, " --cl NUM the max rate of alignment score on the last circle [%.2f]\n", last_circle_rate);
fprintf(stderr, " --local use the ksw local alignment\n");
fprintf(stderr, " --mg use the mode of multi-genomes\n");
fprintf(stderr, " --local-match NUM local alignment score match [%d]\n", mat_score);
fprintf(stderr, " --local-mismatch NUM local alignment score match [%d]\n", mis_score);
fprintf(stderr, " --local-gap-open NUM local alignment score match [%d]\n", gapo_score);
fprintf(stderr, " --local-gap-extension NUM local alignment score match [%d]\n", gape_score);
fprintf(stderr, "Usage: deBGA aln [options] <index_route> <read pair-end1.fq> [read pair-end2.fq] <result_file.sam>\n\nOptions:\n");
fprintf(stderr, " -k INT the minimum length of a valid Uni-MEM seed [21-28]\n");
fprintf(stderr, " -u INT the upper limit of insert size (only for pair-end reads) [%u] \n", upper_ins);
fprintf(stderr, " -f INT the lower limit of insert size (only for pair-end reads) [%u] \n", floor_ins);
fprintf(stderr, " -o INT the maximum number of alignment output [%u]\n", cus_ali_n);
//fprintf(stderr, " -v NUM the max editing distance rate for LandauVishkin [%.2f] (only for pair-end)\n", lv_rate);
fprintf(stderr, " -p INT the number of threads [%u]\n", thread_n);
fprintf(stderr, " -l INT the maximum allowed read length [%u]\n", readlen_max);
//fprintf(stderr, " -r INT seed coverage length filter [%u]\n", length_reduce);
fprintf(stderr, " -i INT the minimum interval of seeding [%u]\n", seed_step);
fprintf(stderr, " -s INT the number of rounds of re-seeding [%u]\n", cir_fix_n);
fprintf(stderr, " -n INT the maximum allowed number of hits per seed [%u]\n", pos_n_max);
fprintf(stderr, " -x INT the maximum number of alignment output for anchoring alignment [%u]\n", cus_max_output_ali);
fprintf(stderr, " -c NUM the threshold of confident alignment [%.2f]\n", max_pair_score_r);
fprintf(stderr, " -e INT the budget for single-end alignment [%u]\n", seed_filter_pos_numn);
//fprintf(stderr, " --aanchor NUM the max rate of mismatch alignment on anchor [%.2f]\n", mis_match_r_single);
//fprintf(stderr, " --vanchor NUM the max editing distance rate for LandauVishkin on anchor [%.2f]\n", lv_rate_anchor);
fprintf(stderr, " --cl NUM the adjusted threshold of confident alignment [%.2f]\n", last_circle_rate);
fprintf(stderr, " --local the local alignment option for confident alignment\n");
//fprintf(stderr, " --mg use the mode of multi-genomes\n");
fprintf(stderr, " --local-match NUM the score for a matched base in the local alignment [%d]\n", mat_score);
fprintf(stderr, " --local-mismatch NUM the penalty for a mismatched base in the local alignment [%d]\n", mis_score);
fprintf(stderr, " --local-gap-open NUM the penalty for a gap open in the local alignment [%d]\n", gapo_score);
fprintf(stderr, " --local-gap-extension NUM the penalty for gap extension in the local alignment [%d]\n", gape_score);
fprintf(stderr, "\n");

return 1;
Expand Down
4 changes: 2 additions & 2 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ static int usage()
fprintf(stderr, "Version: %s\n", COMMAND_VERSION);
fprintf(stderr, "Contact: Hongzhe Guo <[email protected]>\n\n");
fprintf(stderr, "Usage: deBGA <command> [options]\n\n");
fprintf(stderr, "Command: index index sequences in the FASTA format\n");
fprintf(stderr, " aln pair-end and single-end reads seed reduction and alignment based on De bruijn graph\n");
fprintf(stderr, "Command: index index sequences in the FASTA format\n");
fprintf(stderr, " aln pair-end and single-end reads seed reduction and alignment based on De bruijn graph\n");

return 1;
}
Expand Down
111 changes: 72 additions & 39 deletions seed_ali_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2178,8 +2178,10 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)

f_cigar[f_cigarn] = '\0';

sprintf(cigar_m1[tid],"%uM\0",read_length1);
sprintf(cigar_m2[tid],"%uM\0",read_length2);
//sprintf(cigar_m1[tid],"%uM\0",read_length1);
//sprintf(cigar_m2[tid],"%uM\0",read_length2);
sprintf(cigar_m1[tid],"%uM",read_length1);
sprintf(cigar_m2[tid],"%uM",read_length2);

//for bit operation
read_bit_char1 = (((uint16_t )((read_length_a1 >> 5) + 1)) << 3);
Expand Down Expand Up @@ -4547,6 +4549,36 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)

dm12 = dm1 + dm2;

//for debug tha
/*
x = mat_pos1[tid][r_i];
low = 0;
high = chr_file_n - 1;

while ( low <= high )
{
mid = (low + high) >> 1;
if(x < (chr_end_n[mid]))
{
high = mid - 1;
}
else if(x > (chr_end_n[mid]))
{
low = mid + 1;
}
else
{
chr_re = mid;
break;
}
chr_re = low;
}

sam_pos1 = x - chr_end_n[chr_re - 1] + 1;

printf("%u add: %u %s %d %d\n", chr_file_n, sam_pos1, chr_names[chr_re], dm12, dm_op[tid]);
*/

if(dm12 < dm_op[tid])
{
#ifdef DM_COPY_PAIR
Expand Down Expand Up @@ -6608,7 +6640,7 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sprintf(cigar_p1 + snt, "\0");
//sprintf(cigar_p1 + snt, "\0");
}
#ifdef NO_S_OFF
s_offset1 = 0;
Expand Down Expand Up @@ -6848,7 +6880,7 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sprintf(cigar_p2 + snt, "\0");
//sprintf(cigar_p2 + snt, "\0");
}
#ifdef NO_S_OFF
s_offset2 = 0;
Expand Down Expand Up @@ -7506,7 +7538,7 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sprintf(cigar_p1 + snt, "\0");
//sprintf(cigar_p1 + snt, "\0");
}
#ifdef NO_S_OFF
s_offset1 = 0;
Expand Down Expand Up @@ -7750,7 +7782,7 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sprintf(cigar_p2 + snt, "\0");
//sprintf(cigar_p2 + snt, "\0");
}
#ifdef NO_S_OFF
s_offset2 = 0;
Expand Down Expand Up @@ -8309,7 +8341,7 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sprintf(cigar_p1 + snt, "\0");
//sprintf(cigar_p1 + snt, "\0");
}
#ifdef NO_S_OFF
s_offset1 = 0;
Expand Down Expand Up @@ -8551,7 +8583,7 @@ int seed_ali_core(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sprintf(cigar_p2 + snt, "\0");
//sprintf(cigar_p2 + snt, "\0");
}
#ifdef NO_S_OFF
s_offset2 = 0;
Expand Down Expand Up @@ -10412,8 +10444,8 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
sn = sprintf(cigar_p1 + snt, "%dS", read_length1 - s_r_o_r - op_dm_kr1[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;

if (n_cigar1) free(cigar1);
if (n_cigar2) free(cigar2);
Expand Down Expand Up @@ -10713,7 +10745,7 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
snt += sn;
}
}
sprintf(cigar_p1 + snt, "\0");
//sprintf(cigar_p1 + snt, "\0");
}
}

Expand Down Expand Up @@ -10964,8 +10996,8 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
sn = sprintf(cigar_p2 + snt, "%dS", read_length2 - s_r_o_r - op_dm_kr2[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p2 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p2 + snt, "\0");
//snt += sn;

if (n_cigar1) free(cigar1);
if (n_cigar2) free(cigar2);
Expand Down Expand Up @@ -11275,7 +11307,7 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
snt += sn;
}
}
sprintf(cigar_p2 + snt, "\0");
//sprintf(cigar_p2 + snt, "\0");
}
}
#ifdef MAPPING_QUALITY
Expand Down Expand Up @@ -11676,8 +11708,8 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
sn = sprintf(cigar_p1 + snt, "%dS", read_length1 - s_r_o_r - op_dm_kr1[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;

if (n_cigar1) free(cigar1);
if (n_cigar2) free(cigar2);
Expand Down Expand Up @@ -11962,7 +11994,7 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
snt += sn;
}
}
sprintf(cigar_p1 + snt, "\0");
//sprintf(cigar_p1 + snt, "\0");
}
}

Expand Down Expand Up @@ -12180,8 +12212,8 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
sn = sprintf(cigar_p2 + snt, "%dS", read_length2 - s_r_o_r - op_dm_kr2[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p2 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p2 + snt, "\0");
//snt += sn;

if (n_cigar1) free(cigar1);
if (n_cigar2) free(cigar2);
Expand Down Expand Up @@ -12467,7 +12499,7 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
snt += sn;
}
}
sprintf(cigar_p2 + snt, "\0");
//sprintf(cigar_p2 + snt, "\0");
}
}

Expand Down Expand Up @@ -12875,8 +12907,8 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
sn = sprintf(cigar_p1 + snt, "%dS", read_length1 - s_r_o_r - ops_dm_kr1[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;

if (n_cigar1) free(cigar1);
if (n_cigar2) free(cigar2);
Expand Down Expand Up @@ -13194,7 +13226,7 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
snt += sn;
}
}
sprintf(cigar_p1 + snt, "\0");
//sprintf(cigar_p1 + snt, "\0");
}
}

Expand Down Expand Up @@ -13448,8 +13480,8 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
sn = sprintf(cigar_p2 + snt, "%dS", read_length2 - s_r_o_r - ops_dm_kr2[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p2 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p2 + snt, "\0");
//snt += sn;

if (n_cigar1) free(cigar1);
if (n_cigar2) free(cigar2);
Expand Down Expand Up @@ -13766,7 +13798,7 @@ void pair_sam_output(uint8_t tid, uint16_t read_length1, uint16_t read_length2,
snt += sn;
}
}
sprintf(cigar_p2 + snt, "\0");
//sprintf(cigar_p2 + snt, "\0");
}

}
Expand Down Expand Up @@ -16128,7 +16160,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)

f_cigar[f_cigarn] = '\0';

sprintf(cigar_m[tid],"%uM\0",read_length);
//sprintf(cigar_m[tid],"%uM\0",read_length);
sprintf(cigar_m[tid],"%uM",read_length);

//for bit operation
read_bit_char = (((uint16_t )((read_length_a >> 5) + 1)) << 3);
Expand Down Expand Up @@ -17207,8 +17240,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)
sn = sprintf(cigar_p1 + snt, "%dS", read_length -s_r_o_r - op_dm_kr1[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;

if(n_cigar1) free(cigar1);
if(n_cigar2) free(cigar2);
Expand Down Expand Up @@ -17416,8 +17449,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;
}
#ifdef NO_S_OFF
s_offset1 = 0;
Expand Down Expand Up @@ -17716,8 +17749,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)
sn = sprintf(cigar_p1 + snt, "%dS", read_length - s_r_o_r - op_dm_kr1[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;

lv_re1f = nm_score;

Expand Down Expand Up @@ -17898,8 +17931,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;
}
sam_pos1 = sam_pos1 + i_n1 - d_n1;
}
Expand Down Expand Up @@ -18232,8 +18265,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)
sn = sprintf(cigar_p1 + snt, "%dS", read_length - s_r_o_r - ops_dm_kr1[tid][v_cnt_i]);
snt += sn;
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;

lv_re1f = nm_score;

Expand Down Expand Up @@ -18443,8 +18476,8 @@ int seed_ali_core_single_end(uint32_t seqn, uint8_t tid)
snt += sn;
}
}
sn = sprintf(cigar_p1 + snt, "\0");
snt += sn;
//sn = sprintf(cigar_p1 + snt, "\0");
//snt += sn;
}
sam_pos1 = sam_pos1 + i_n1 - d_n1;
}
Expand Down

0 comments on commit adf74ef

Please sign in to comment.