Skip to content

Commit

Permalink
tgs sample slave, somtimes RE ?
Browse files Browse the repository at this point in the history
  • Loading branch information
yanlifeng committed Dec 16, 2022
1 parent 81f84d4 commit 7fbc877
Show file tree
Hide file tree
Showing 20 changed files with 1,308 additions and 295 deletions.
38 changes: 29 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
PRINTDEBUG := 0
InstructSet :=

$(info )

DIR_INC := ./inc
DIR_SRC := ./src
SLAVE_DIR_SRC := ./slave
DIR_OBJ := ./obj


PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
INCLUDE_DIRS ?=
Expand All @@ -19,6 +22,10 @@ SRC2 := $(wildcard ${DIR_SRC}/*.c)
OBJ += $(patsubst %.c,${DIR_OBJ}/%.o,$(notdir ${SRC2}))


SRC3 := $(wildcard ${SLAVE_DIR_SRC}/*.cpp)
OBJ += $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${SRC3}))


TARGET := RabbitQCPlus

BIN_TARGET := ${TARGET}
Expand All @@ -28,29 +35,42 @@ CXX = sw5g++


CXXFLAGS := $(InstructSet)
CXXFLAGS += -DNDUBUG -std=c++11 -I./ -I./common -g -O3 -w -fopenmp
CXXFLAGS += -DVerbose -std=c++11 -I./ -I./common -g -O3 -w -fopenmp


CXX2 = sw5gcc
CXXFLAGS2 := -g -O3 -w -Wextra -Wno-unknown-pragmas -Wcast-qual

LIBS := -static -lz -Wl,-z,muldefs -lpthread -fopenmp -lrt
LIBS := -static -lz -Wl,-z,muldefs -lpthread -fopenmp -lrt


LD_FLAGS := $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(LIBS)




${BIN_TARGET}:${OBJ}
$(CXX) $(OBJ) -o $@ $(LD_FLAGS)
python3 xlink.py $(CXX) -mhybrid $^ -o $@ $(LD_FLAGS)

${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp
$(CXX) -c $< -o $@ $(CXXFLAGS)
$(CXX) -mhost -msimd -c $< -o $@ $(CXXFLAGS)
${DIR_OBJ}/%.o:${SLAVE_DIR_SRC}/%.cpp
$(CXX) -mslave -msimd -c $< -o $@ $(CXXFLAGS)
${DIR_OBJ}/%.o:${DIR_SRC}/%.c
$(CXX2) -mhost -msimd -c $< -o $@ $(CXXFLAGS2)


${DIR_OBJ}/%.o:${DIR_SRC}/%.c
$(CXX2) $(CXXFLAGS2) -c $< -o $@

#${BIN_TARGET}:${OBJ}
# python3 xlink.py $(CXX) -mhybrid $^ -o $@ $(LD_FLAGS)
#
#${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp
# $(CXX) -mhost -msimd -c $< -o $@ $(CXXFLAGS)
#
#${DIR_OBJ}/%.o:${SLAVE_DIR_SRC}/%.cpp
# $(CXX) -mslave -msimd -c $< -o $@ $(CXXFLAGS)
#
#${DIR_OBJ}/%.o:${DIR_SRC}/%.c
# $(CXX2) -mhost -msimd -c $< -o $@ $(CXXFLAGS2)
#


.PHONY:clean
clean:
Expand Down
1 change: 1 addition & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bsub -b -I -q q_sw_expr -n 1 -cgsp 64 -sw3run swrun-5a -share_size 4096 ./RabbitQCPlus --help
39 changes: 39 additions & 0 deletions slave/Reference.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef __REFERENCE_H_
#define __REFERENCE_H_

/**
* @brief Reference struct that store the FASTA and FASTQ infomation
*/
struct Reference {
std::string name;
std::string comment;
std::string seq;
std::string quality;
std::string strand;
uint64_t length;
uint64_t gid;
};

/**
* @brief Reference struct that store the FASTA and FASTQ infomation, different from Reference
* neoReference only record the start position and length of name, sequence, strand and quality
* base on certain chunk data `base`
*/
struct neoReference {
uint64_t pname; /// name offset form base
uint64_t pcom; /// comment offset form base
uint64_t pseq; /// sequence offset form base
uint64_t pqual; /// quality offset form base
uint64_t pstrand;///strand offset form base

uint64_t lname; /// length of name
uint64_t lcom; /// length of comment
uint64_t lseq; /// length of sequence
uint64_t lqual; /// length of quality
uint64_t lstrand; /// length of strand
uint64_t gid; /// global id
unsigned char *base;/// base data pointer
};


#endif
112 changes: 112 additions & 0 deletions slave/cmdinfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
//
// Created by ylf9811 on 2021/7/6.
//

#ifndef RERABBITQC_CMDINFO_H
#define RERABBITQC_CMDINFO_H

class CmdInfo {
public:
CmdInfo();

public:
std::string in_file_name1_;
std::string in_file_name2_;
std::string out_file_name1_;
std::string out_file_name2_;
int thread_number_;
std::string command_;
bool isPhred64_;
bool isStdin_;
bool isStdout_;

bool overWrite_;
bool write_data_;
int64_t out_block_size_;
int64_t in_file_size1_;
int64_t in_file_size2_;
int seq_len_;
int qul_range_;

//param for filter
int length_required_;
int length_limit_;
int n_number_limit_;
int low_qual_perc_limit_;
bool trim_5end_;
bool trim_3end_;
int cut_window_size_;
int cut_mean_quality_;
int trim_front1_;
int trim_tail1_;
int trim_front2_;
int trim_tail2_;


//param for adapter
bool trim_adapter_;
bool no_trim_adapter_;
bool se_auto_detect_adapter_;
bool pe_auto_detect_adapter_;
bool detect_adapter1_;
bool detect_adapter2_;
std::string adapter_seq1_;
std::string adapter_seq2_;
int overlap_diff_limit_;
int overlap_require_;
bool correct_data_;
bool analyze_overlap_;
int adapter_len_lim_;
bool print_what_trimmed_;
std::vector<std::string> adapter_from_fasta_;
std::string adapter_fasta_file_;

//duplicate
bool state_duplicate_;

//polyx
bool trim_polyx_;
bool trim_polyg_;
int trim_poly_len_;

//umi
bool add_umi_;
int umi_loc_;
int umi_len_;
std::string umi_prefix_;
int umi_skip_;

//TGS
bool is_TGS_;
int TGS_min_len_;

//overrepresentation_analysis
bool do_overrepresentation_;
int overrepresentation_sampling_;
std::vector<std::string> hot_seqs_;
std::vector<std::string> hot_seqs2_;
int eva_len_;
int eva_len2_;
bool print_ORP_seqs_;

//insert size
bool no_insert_size_;
int max_insert_size_;

//gz
int compression_level_;

//interleaved
bool interleaved_in_;
bool interleaved_out_;


//parallel gz
bool use_pugz_;
bool use_pigz_;
int pugz_threads_;
int pigz_threads_;
};


#endif//RERABBITQC_CMDINFO_H
49 changes: 49 additions & 0 deletions slave/dma_funcs.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma once
#include <sys/cdefs.h>
#define __sw_256bit_simd__
#ifdef __sw_512bit_simd__
typedef int desc_t __attribute__ ((__mode__(__V1XI__)));
#elif defined(__sw_256bit_simd__)
typedef int desc_t __attribute__ ((__mode__(__V1OI__)));
#endif
template <typename TM, typename TL>
__always_inline void dma_getn(TM *mem, TL *ldm, int count) {
//long dma_desc[4];
desc_t dma_desc;
asm volatile(
"beq %[SIZE], 2f\n\t"
"memb\n\t"
"stl $31, 0(%[RPL])\n\t"
"vinsw %[RPL], $31, 2, %[DESC]\n\t"
"ldi %[DESC], 1($31)\n\t"
"sll %[DESC], 56, %[DESC]\n\t"
"vinsw %[SIZE], %[DESC], 0, %[DESC]\n\t"
"dma %[DESC], %[MEM], %[LDM]\n\t"
"1:\n\t"
"ldw %[DESC], 0(%[RPL])\n\t"
"beq %[DESC], 1b\n\t"
"2:\n\t"
"memb\n\t"
: [ DESC ] "=&r"(dma_desc)
: [ MEM ] "r"(mem), [ LDM ] "r"(ldm), [ RPL ] "r"(((long*)&dma_desc) + 2), [ SIZE ] "r"(count * sizeof(TL))
: "memory");
}
template <typename TM, typename TL>
__always_inline __attribute__((__artificial__)) void dma_putn(TM *mem, TL *ldm, int count) {
desc_t dma_desc;
asm volatile(
"beq %[SIZE], 2f\n\t"
"memb\n\t"
"stl $31, 0(%[RPL])\n\t"
"vinsw %[RPL], $31, 2, %[DESC]\n\t"
"vinsw %[SIZE], %[DESC], 0, %[DESC]\n\t"
"dma %[DESC], %[MEM], %[LDM]\n\t"
"1:\n\t"
"ldw %[DESC], 0(%[RPL])\n\t"
"beq %[DESC], 1b\n\t"
"2:\n\t"
"memb\n\t"
: [ DESC ] "=&r"(dma_desc)
: [ MEM ] "r"(mem), [ LDM ] "r"(ldm), [ RPL ] "r"(((long*)&dma_desc) + 2), [ SIZE ] "r"(count * sizeof(TL))
: "memory");
}
112 changes: 112 additions & 0 deletions slave/slave.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@

#include <cstring>
#include <vector>
#include <unordered_map>
#include <unistd.h>

#include "tuple_spawn.hpp"
#include "slave.h"
#include "Reference.h"
#include "cmdinfo.h"
#include "state.h"
#include "tgsstate.h"
#include "threadinfo.h"


struct qc_data {
ThreadInfo **thread_info_;
CmdInfo *cmd_info_;
std::vector <neoReference> *data_;
std::vector <neoReference> *pass_data_;
int *cnt;
};

void updateTop5(int rlen, double avgQual, TGSStats *tgs_state) {
int pos = 5;
for (int i = 0; i < 5; i++) {
if (tgs_state->mTop5QualReads[i].first < avgQual) {
for (int j = 4; j > i; j--) {
tgs_state->mTop5QualReads[j] = tgs_state->mTop5QualReads[j - 1];
}
tgs_state->mTop5QualReads[i] = {avgQual, rlen};
break;
}
}
for (int i = 0; i < 5; i++) {
if (tgs_state->mTop5LengReads[i].first < rlen) {
for (int j = 4; j > i; j--) {
tgs_state->mTop5LengReads[j] = tgs_state->mTop5LengReads[j - 1];
}
tgs_state->mTop5LengReads[i] = {rlen, avgQual};
break;
}
}
}

void tgsfunc(qc_data *para) {
//if(_PEN != 0) return;
std::vector <neoReference> *data = para->data_;
ThreadInfo *thread_info = para->thread_info_[_PEN];
CmdInfo *cmd_info = para->cmd_info_;
int data_num = data->size();
int base_num = 0;
for (int id = _PEN; id < data_num; id += 64) {
//for (int id = 0; id < data_num; id++) {
para->cnt[_PEN]++;
auto ref = (*data)[id];
//base_num += ref.lqual;
const int rlen = ref.lseq;
const char *seq = reinterpret_cast<const char *>(ref.base + ref.pseq);
const char *quality = reinterpret_cast<const char *>(ref.base + ref.pqual);
int size_range = thread_info->TGS_state_->mMinlen >> 1;
if(rlen < size_range) size_range = rlen;
if(thread_info->TGS_state_->countLenNum < (1 << 20))
thread_info->TGS_state_->mTotalReadsLen[thread_info->TGS_state_->countLenNum++] = rlen;
thread_info->TGS_state_->mReadsNum++;
thread_info->TGS_state_->mBasesNum += rlen;
char c1, c2;
int phredSub = 33;
if (cmd_info->isPhred64_) phredSub = 64;
int sumQual = 0;
for (int i = 0; i < rlen; i++) {
int qual = (quality[i] - phredSub);
sumQual += qual;
if (qual >= 5) thread_info->TGS_state_->mBases51015Num[0]++;
if (qual >= 10) thread_info->TGS_state_->mBases51015Num[1]++;
if (qual >= 15) thread_info->TGS_state_->mBases51015Num[2]++;
}
updateTop5(rlen, 1.0 * sumQual / rlen, thread_info->TGS_state_);
for (int i = 0; i < size_range; ++i) {
c1 = seq[i];
thread_info->TGS_state_->head_qual_sum[i] += (quality[i] - phredSub);
if (c1 == 'A') {
thread_info->TGS_state_->head_seq_pos_count[0][i]++;
} else if (c1 == 'T') {
thread_info->TGS_state_->head_seq_pos_count[1][i]++;
} else if (c1 == 'C') {
thread_info->TGS_state_->head_seq_pos_count[2][i]++;
} else if (c1 == 'G') {
thread_info->TGS_state_->head_seq_pos_count[3][i]++;
}
}
for (int i = rlen - 1; i >= rlen - size_range; --i) {
c2 = seq[i];
thread_info->TGS_state_->tail_qual_sum[i - (rlen - size_range)] += (quality[i] - phredSub);

if (c2 == 'A') {
thread_info->TGS_state_->tail_seq_pos_count[0][i - (rlen - size_range)]++;
} else if (c2 == 'T') {
thread_info->TGS_state_->tail_seq_pos_count[1][i - (rlen - size_range)]++;
} else if (c2 == 'C') {
thread_info->TGS_state_->tail_seq_pos_count[2][i - (rlen - size_range)]++;
} else if (c2 == 'G') {
thread_info->TGS_state_->tail_seq_pos_count[3][i - (rlen - size_range)]++;
}
}

}
//printf("=== %d base number %d\n", _PEN, base_num);
}

// template void tmpl_entrance(decltype(test__)*);
ATHREAD_VISIBLE(tgsfunc);
Loading

0 comments on commit 7fbc877

Please sign in to comment.