From 7fbc8775086274c27aee3292274db330139ab68a Mon Sep 17 00:00:00 2001 From: yanlifeng <1362520198@qq.com> Date: Fri, 16 Dec 2022 09:32:02 +0800 Subject: [PATCH] tgs sample slave, somtimes RE ? --- Makefile | 38 ++++-- run.sh | 1 + slave/Reference.h | 39 ++++++ slave/cmdinfo.h | 112 +++++++++++++++++ slave/dma_funcs.hpp | 49 ++++++++ slave/slave.cpp | 112 +++++++++++++++++ slave/state.h | 161 +++++++++++++++++++++++++ slave/tgsstate.h | 97 +++++++++++++++ slave/threadinfo.h | 27 +++++ slave/tuple_spawn.hpp | 71 +++++++++++ src/Globals.h | 1 + src/adapter.cpp | 11 +- src/main.cpp | 165 ++++++++++++++----------- src/seqc.cpp | 256 +++++++++++++++++++++------------------ src/seqc.h | 2 +- src/state.cpp | 4 +- src/tgsstate.cpp | 99 ++++----------- src/tgsstate.h | 15 ++- src/tuple_spawn.hpp | 71 +++++++++++ xlink.py | 272 ++++++++++++++++++++++++++++++++++++++++++ 20 files changed, 1308 insertions(+), 295 deletions(-) create mode 100644 run.sh create mode 100644 slave/Reference.h create mode 100644 slave/cmdinfo.h create mode 100644 slave/dma_funcs.hpp create mode 100644 slave/slave.cpp create mode 100644 slave/state.h create mode 100644 slave/tgsstate.h create mode 100644 slave/threadinfo.h create mode 100644 slave/tuple_spawn.hpp create mode 100644 src/tuple_spawn.hpp create mode 100644 xlink.py diff --git a/Makefile b/Makefile index 1c5a657..04c7fa4 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,14 @@ PRINTDEBUG := 0 +InstructSet := $(info ) DIR_INC := ./inc DIR_SRC := ./src +SLAVE_DIR_SRC := ./slave DIR_OBJ := ./obj + PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin INCLUDE_DIRS ?= @@ -19,6 +22,10 @@ SRC2 := $(wildcard ${DIR_SRC}/*.c) OBJ += $(patsubst %.c,${DIR_OBJ}/%.o,$(notdir ${SRC2})) +SRC3 := $(wildcard ${SLAVE_DIR_SRC}/*.cpp) +OBJ += $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${SRC3})) + + TARGET := RabbitQCPlus BIN_TARGET := ${TARGET} @@ -28,29 +35,42 @@ CXX = sw5g++ CXXFLAGS := $(InstructSet) -CXXFLAGS += -DNDUBUG -std=c++11 -I./ -I./common -g -O3 -w -fopenmp +CXXFLAGS += -DVerbose -std=c++11 -I./ -I./common -g -O3 -w -fopenmp CXX2 = sw5gcc CXXFLAGS2 := -g -O3 -w -Wextra -Wno-unknown-pragmas -Wcast-qual -LIBS := -static -lz -Wl,-z,muldefs -lpthread -fopenmp -lrt +LIBS := -static -lz -Wl,-z,muldefs -lpthread -fopenmp -lrt LD_FLAGS := $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(LIBS) - - - ${BIN_TARGET}:${OBJ} - $(CXX) $(OBJ) -o $@ $(LD_FLAGS) + python3 xlink.py $(CXX) -mhybrid $^ -o $@ $(LD_FLAGS) ${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp - $(CXX) -c $< -o $@ $(CXXFLAGS) + $(CXX) -mhost -msimd -c $< -o $@ $(CXXFLAGS) +${DIR_OBJ}/%.o:${SLAVE_DIR_SRC}/%.cpp + $(CXX) -mslave -msimd -c $< -o $@ $(CXXFLAGS) +${DIR_OBJ}/%.o:${DIR_SRC}/%.c + $(CXX2) -mhost -msimd -c $< -o $@ $(CXXFLAGS2) -${DIR_OBJ}/%.o:${DIR_SRC}/%.c - $(CXX2) $(CXXFLAGS2) -c $< -o $@ + +#${BIN_TARGET}:${OBJ} +# python3 xlink.py $(CXX) -mhybrid $^ -o $@ $(LD_FLAGS) +# +#${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp +# $(CXX) -mhost -msimd -c $< -o $@ $(CXXFLAGS) +# +#${DIR_OBJ}/%.o:${SLAVE_DIR_SRC}/%.cpp +# $(CXX) -mslave -msimd -c $< -o $@ $(CXXFLAGS) +# +#${DIR_OBJ}/%.o:${DIR_SRC}/%.c +# $(CXX2) -mhost -msimd -c $< -o $@ $(CXXFLAGS2) +# + .PHONY:clean clean: diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..9bafcf4 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +bsub -b -I -q q_sw_expr -n 1 -cgsp 64 -sw3run swrun-5a -share_size 4096 ./RabbitQCPlus --help diff --git a/slave/Reference.h b/slave/Reference.h new file mode 100644 index 0000000..4bc7b9d --- /dev/null +++ b/slave/Reference.h @@ -0,0 +1,39 @@ +#ifndef __REFERENCE_H_ +#define __REFERENCE_H_ + +/** + * @brief Reference struct that store the FASTA and FASTQ infomation + */ +struct Reference { + std::string name; + std::string comment; + std::string seq; + std::string quality; + std::string strand; + uint64_t length; + uint64_t gid; +}; + +/** + * @brief Reference struct that store the FASTA and FASTQ infomation, different from Reference + * neoReference only record the start position and length of name, sequence, strand and quality + * base on certain chunk data `base` + */ +struct neoReference { + uint64_t pname; /// name offset form base + uint64_t pcom; /// comment offset form base + uint64_t pseq; /// sequence offset form base + uint64_t pqual; /// quality offset form base + uint64_t pstrand;///strand offset form base + + uint64_t lname; /// length of name + uint64_t lcom; /// length of comment + uint64_t lseq; /// length of sequence + uint64_t lqual; /// length of quality + uint64_t lstrand; /// length of strand + uint64_t gid; /// global id + unsigned char *base;/// base data pointer +}; + + +#endif diff --git a/slave/cmdinfo.h b/slave/cmdinfo.h new file mode 100644 index 0000000..eed2542 --- /dev/null +++ b/slave/cmdinfo.h @@ -0,0 +1,112 @@ +// +// Created by ylf9811 on 2021/7/6. +// + +#ifndef RERABBITQC_CMDINFO_H +#define RERABBITQC_CMDINFO_H + +class CmdInfo { +public: + CmdInfo(); + +public: + std::string in_file_name1_; + std::string in_file_name2_; + std::string out_file_name1_; + std::string out_file_name2_; + int thread_number_; + std::string command_; + bool isPhred64_; + bool isStdin_; + bool isStdout_; + + bool overWrite_; + bool write_data_; + int64_t out_block_size_; + int64_t in_file_size1_; + int64_t in_file_size2_; + int seq_len_; + int qul_range_; + + //param for filter + int length_required_; + int length_limit_; + int n_number_limit_; + int low_qual_perc_limit_; + bool trim_5end_; + bool trim_3end_; + int cut_window_size_; + int cut_mean_quality_; + int trim_front1_; + int trim_tail1_; + int trim_front2_; + int trim_tail2_; + + + //param for adapter + bool trim_adapter_; + bool no_trim_adapter_; + bool se_auto_detect_adapter_; + bool pe_auto_detect_adapter_; + bool detect_adapter1_; + bool detect_adapter2_; + std::string adapter_seq1_; + std::string adapter_seq2_; + int overlap_diff_limit_; + int overlap_require_; + bool correct_data_; + bool analyze_overlap_; + int adapter_len_lim_; + bool print_what_trimmed_; + std::vector adapter_from_fasta_; + std::string adapter_fasta_file_; + + //duplicate + bool state_duplicate_; + + //polyx + bool trim_polyx_; + bool trim_polyg_; + int trim_poly_len_; + + //umi + bool add_umi_; + int umi_loc_; + int umi_len_; + std::string umi_prefix_; + int umi_skip_; + + //TGS + bool is_TGS_; + int TGS_min_len_; + + //overrepresentation_analysis + bool do_overrepresentation_; + int overrepresentation_sampling_; + std::vector hot_seqs_; + std::vector hot_seqs2_; + int eva_len_; + int eva_len2_; + bool print_ORP_seqs_; + + //insert size + bool no_insert_size_; + int max_insert_size_; + + //gz + int compression_level_; + + //interleaved + bool interleaved_in_; + bool interleaved_out_; + + + //parallel gz + bool use_pugz_; + bool use_pigz_; + int pugz_threads_; + int pigz_threads_; +}; + + +#endif//RERABBITQC_CMDINFO_H diff --git a/slave/dma_funcs.hpp b/slave/dma_funcs.hpp new file mode 100644 index 0000000..4d993bf --- /dev/null +++ b/slave/dma_funcs.hpp @@ -0,0 +1,49 @@ +#pragma once +#include +#define __sw_256bit_simd__ +#ifdef __sw_512bit_simd__ +typedef int desc_t __attribute__ ((__mode__(__V1XI__))); +#elif defined(__sw_256bit_simd__) +typedef int desc_t __attribute__ ((__mode__(__V1OI__))); +#endif +template +__always_inline void dma_getn(TM *mem, TL *ldm, int count) { + //long dma_desc[4]; + desc_t dma_desc; + asm volatile( + "beq %[SIZE], 2f\n\t" + "memb\n\t" + "stl $31, 0(%[RPL])\n\t" + "vinsw %[RPL], $31, 2, %[DESC]\n\t" + "ldi %[DESC], 1($31)\n\t" + "sll %[DESC], 56, %[DESC]\n\t" + "vinsw %[SIZE], %[DESC], 0, %[DESC]\n\t" + "dma %[DESC], %[MEM], %[LDM]\n\t" + "1:\n\t" + "ldw %[DESC], 0(%[RPL])\n\t" + "beq %[DESC], 1b\n\t" + "2:\n\t" + "memb\n\t" + : [ DESC ] "=&r"(dma_desc) + : [ MEM ] "r"(mem), [ LDM ] "r"(ldm), [ RPL ] "r"(((long*)&dma_desc) + 2), [ SIZE ] "r"(count * sizeof(TL)) + : "memory"); +} +template +__always_inline __attribute__((__artificial__)) void dma_putn(TM *mem, TL *ldm, int count) { + desc_t dma_desc; + asm volatile( + "beq %[SIZE], 2f\n\t" + "memb\n\t" + "stl $31, 0(%[RPL])\n\t" + "vinsw %[RPL], $31, 2, %[DESC]\n\t" + "vinsw %[SIZE], %[DESC], 0, %[DESC]\n\t" + "dma %[DESC], %[MEM], %[LDM]\n\t" + "1:\n\t" + "ldw %[DESC], 0(%[RPL])\n\t" + "beq %[DESC], 1b\n\t" + "2:\n\t" + "memb\n\t" + : [ DESC ] "=&r"(dma_desc) + : [ MEM ] "r"(mem), [ LDM ] "r"(ldm), [ RPL ] "r"(((long*)&dma_desc) + 2), [ SIZE ] "r"(count * sizeof(TL)) + : "memory"); +} diff --git a/slave/slave.cpp b/slave/slave.cpp new file mode 100644 index 0000000..3149212 --- /dev/null +++ b/slave/slave.cpp @@ -0,0 +1,112 @@ + +#include +#include +#include +#include + +#include "tuple_spawn.hpp" +#include "slave.h" +#include "Reference.h" +#include "cmdinfo.h" +#include "state.h" +#include "tgsstate.h" +#include "threadinfo.h" + + +struct qc_data { + ThreadInfo **thread_info_; + CmdInfo *cmd_info_; + std::vector *data_; + std::vector *pass_data_; + int *cnt; +}; + +void updateTop5(int rlen, double avgQual, TGSStats *tgs_state) { + int pos = 5; + for (int i = 0; i < 5; i++) { + if (tgs_state->mTop5QualReads[i].first < avgQual) { + for (int j = 4; j > i; j--) { + tgs_state->mTop5QualReads[j] = tgs_state->mTop5QualReads[j - 1]; + } + tgs_state->mTop5QualReads[i] = {avgQual, rlen}; + break; + } + } + for (int i = 0; i < 5; i++) { + if (tgs_state->mTop5LengReads[i].first < rlen) { + for (int j = 4; j > i; j--) { + tgs_state->mTop5LengReads[j] = tgs_state->mTop5LengReads[j - 1]; + } + tgs_state->mTop5LengReads[i] = {rlen, avgQual}; + break; + } + } +} + +void tgsfunc(qc_data *para) { + //if(_PEN != 0) return; + std::vector *data = para->data_; + ThreadInfo *thread_info = para->thread_info_[_PEN]; + CmdInfo *cmd_info = para->cmd_info_; + int data_num = data->size(); + int base_num = 0; + for (int id = _PEN; id < data_num; id += 64) { + //for (int id = 0; id < data_num; id++) { + para->cnt[_PEN]++; + auto ref = (*data)[id]; + //base_num += ref.lqual; + const int rlen = ref.lseq; + const char *seq = reinterpret_cast(ref.base + ref.pseq); + const char *quality = reinterpret_cast(ref.base + ref.pqual); + int size_range = thread_info->TGS_state_->mMinlen >> 1; + if(rlen < size_range) size_range = rlen; + if(thread_info->TGS_state_->countLenNum < (1 << 20)) + thread_info->TGS_state_->mTotalReadsLen[thread_info->TGS_state_->countLenNum++] = rlen; + thread_info->TGS_state_->mReadsNum++; + thread_info->TGS_state_->mBasesNum += rlen; + char c1, c2; + int phredSub = 33; + if (cmd_info->isPhred64_) phredSub = 64; + int sumQual = 0; + for (int i = 0; i < rlen; i++) { + int qual = (quality[i] - phredSub); + sumQual += qual; + if (qual >= 5) thread_info->TGS_state_->mBases51015Num[0]++; + if (qual >= 10) thread_info->TGS_state_->mBases51015Num[1]++; + if (qual >= 15) thread_info->TGS_state_->mBases51015Num[2]++; + } + updateTop5(rlen, 1.0 * sumQual / rlen, thread_info->TGS_state_); + for (int i = 0; i < size_range; ++i) { + c1 = seq[i]; + thread_info->TGS_state_->head_qual_sum[i] += (quality[i] - phredSub); + if (c1 == 'A') { + thread_info->TGS_state_->head_seq_pos_count[0][i]++; + } else if (c1 == 'T') { + thread_info->TGS_state_->head_seq_pos_count[1][i]++; + } else if (c1 == 'C') { + thread_info->TGS_state_->head_seq_pos_count[2][i]++; + } else if (c1 == 'G') { + thread_info->TGS_state_->head_seq_pos_count[3][i]++; + } + } + for (int i = rlen - 1; i >= rlen - size_range; --i) { + c2 = seq[i]; + thread_info->TGS_state_->tail_qual_sum[i - (rlen - size_range)] += (quality[i] - phredSub); + + if (c2 == 'A') { + thread_info->TGS_state_->tail_seq_pos_count[0][i - (rlen - size_range)]++; + } else if (c2 == 'T') { + thread_info->TGS_state_->tail_seq_pos_count[1][i - (rlen - size_range)]++; + } else if (c2 == 'C') { + thread_info->TGS_state_->tail_seq_pos_count[2][i - (rlen - size_range)]++; + } else if (c2 == 'G') { + thread_info->TGS_state_->tail_seq_pos_count[3][i - (rlen - size_range)]++; + } + } + + } + //printf("=== %d base number %d\n", _PEN, base_num); +} + +// template void tmpl_entrance(decltype(test__)*); +ATHREAD_VISIBLE(tgsfunc); diff --git a/slave/state.h b/slave/state.h new file mode 100644 index 0000000..5f34e1a --- /dev/null +++ b/slave/state.h @@ -0,0 +1,161 @@ +// +// Created by ylf9811 on 2021/7/8. +// + +#ifndef RERABBITQC_STATE_H +#define RERABBITQC_STATE_H + + +struct node { + int pre, cnt; + uint64_t v; + int slen; + std::string seq; + int64_t *dist; +}; + +class State { +public: + State(CmdInfo *cmd_info, int seq_len, int qul_range, bool is_reed2); + + ~State(); + + void StateInfo(neoReference &ref); + + void StateORP(neoReference &ref); + + void ExtendBuffer(int old_len, int new_len); + + void Summarize(); + + static State *MergeStates(const std::vector &states); + + static void PrintStates(const State *state); + + static void PrintFilterResults(const State *state); + + static void PrintAdapterToFile(const State *state); + + static std::string list2string(int64_t *list, int size); + + static std::string list2string(double *list, int size); + + void HashInsert(const char *seq, int len, int eva_len); + + void HashState(); + + //void HashQueryAndAdd(uint64_t now, int offset, int len, int eva_len); + + bool HashQueryAndAdd(uint64_t now, int offset, int len, int eva_len); + + int *GetHeadHashGraph() const; + + node *GetHashGraph() const; + + int GetHashNum() const; + + int64_t GetQ20Bases() const; + + int64_t GetQ30Bases() const; + + int64_t GetLines() const; + + int GetMallocSeqLen() const; + + int GetQulRange() const; + + int GetRealSeqLen() const; + + int GetKmerBufLen() const; + + int64_t *GetPosQul() const; + + int64_t *GetPosCnt() const; + + int64_t *GetLenCnt() const; + + int64_t *GetGcCnt() const; + + int64_t *GetQulCnt() const; + + int64_t *GetKmer() const; + + int64_t GetKmerMin() const; + + int64_t GetKmerMax() const; + + bool IsHasSummarize() const; + + int64_t GetTotBases() const; + + int64_t GetGcBases() const; + + double GetOrpCost() const; + + double GetAvgLen() const; + + void AddPassReads(); + + void AddFailShort(); + + void AddFailLong(); + + void AddFailN(); + + void AddFailLowq(); + + void AddTrimAdapter(); + + void AddTrimAdapterBase(int cnt); + + + std::unordered_map GetAdapterMap(); + + CmdInfo *GetCmdInfo() const; + +private: + CmdInfo *cmd_info_; + + int64_t q20bases_; + int64_t q30bases_; + int64_t lines_; + int malloc_seq_len_; + int qul_range_; + int real_seq_len_; + int kmer_buf_len_; + int64_t *pos_qul_; + int64_t *pos_cnt_; + int64_t *len_cnt_; + int64_t *gc_cnt_; + int64_t *qul_cnt_; + int64_t *kmer_; + int64_t kmer_min_; + int64_t kmer_max_; + int64_t tot_bases_; + int64_t gc_bases_; + double avg_len; + bool has_summarize_; + int *head_hash_graph_; + node *hash_graph_; + int hash_num_; + bool is_read2_; + bool do_over_represent_analyze_; + int over_representation_sampling_; + int64_t over_representation_qcnt_; + int64_t over_representation_pcnt_; + int64_t *bf_zone_; + double orpCost; + +public: + int64_t pass_reads_; + int64_t fail_short_; + int64_t fail_long_; + int64_t fail_N_; + int64_t fail_lowq_; + int64_t trim_adapter_; + int64_t trim_adapter_bases_; + std::unordered_map adapter_map_; +}; + + +#endif//RERABBITQC_STATE_H diff --git a/slave/tgsstate.h b/slave/tgsstate.h new file mode 100644 index 0000000..d94b5b5 --- /dev/null +++ b/slave/tgsstate.h @@ -0,0 +1,97 @@ +// +// Created by ylf9811 on 2021/8/11. +// + +#ifndef RERABBITQC_TGSSTATE_H +#define RERABBITQC_TGSSTATE_H + + +class TGSStats { +public: + TGSStats(int minLen, bool isMerge = false); + + ~TGSStats(); + + static TGSStats *merge(std::vector &list); + + void print(); + + void CalReadsLens(); + + bool isLongRead(); + + static std::string list2string(double *list, int size); + + static std::string list2string(double *list, int size, int64_t *coords); + + static std::string list2string(int64_t *list, int size); + + static std::string list2stringReversedOrder(int64_t *list, int size); + + int base2num(std::string base); + +//private: +public: + int mMinlen; + + int mHalfMinlen; + + //std::vector mTotalReadsLen; + int *mTotalReadsLen; + + int countLenNum; + + int *readsLens; + + int64_t *head_seq_pos_count[4]; + + int64_t *tail_seq_pos_count[4]; + + int64_t *head_qual_sum; + + int64_t *tail_qual_sum; + + int mMaxReadsLen; + + double mAvgReadsLen; + + int64_t mReadsNum; + + int64_t mBasesNum; + + int64_t *mBases51015Num; + + std::pair *mTop5QualReads; + + std::pair *mTop5LengReads; + + void updateTop5(int rlen, double avgQual); + +public: + int64_t *const *GetHeadSeqPosCount() const; + + int64_t *const *GetTailSeqPosCount() const; + + const int64_t *GetHeadQualSum() const; + + const int64_t *GetTailQualSum() const; + + const int GetMaxReadsLen() const; + + const double GetAvgReadsLen() const; + + const int64_t GetReadsNum() const; + + const int64_t GetBasesNum() const; + + const int64_t *GetBases51015Num() const; + + const std::pair *GetTop5QualReads() const; + + const std::pair *GetTop5LengReads() const; + + int *GetReadsLens() const; +}; + + +#endif//RERABBITQC_TGSSTATE_H diff --git a/slave/threadinfo.h b/slave/threadinfo.h new file mode 100644 index 0000000..245226a --- /dev/null +++ b/slave/threadinfo.h @@ -0,0 +1,27 @@ +// +// Created by ylf9811 on 2021/7/7. +// + +#ifndef RERABBITQC_THREADINFO_H +#define RERABBITQC_THREADINFO_H + + +class ThreadInfo { +public: + ThreadInfo(CmdInfo *cmd_info, bool is_pre); + + ~ThreadInfo(); + +public: + bool is_pe_; + CmdInfo *cmd_info_; + TGSStats *TGS_state_; + State *pre_state1_; + State *pre_state2_; + State *aft_state1_; + State *aft_state2_; + int64_t *insert_size_dist_; +}; + + +#endif//RERABBITQC_THREADINFO_H diff --git a/slave/tuple_spawn.hpp b/slave/tuple_spawn.hpp new file mode 100644 index 0000000..e348ac9 --- /dev/null +++ b/slave/tuple_spawn.hpp @@ -0,0 +1,71 @@ +#pragma once +#include +#ifdef __sw_slave__ +#include "dma_funcs.hpp" +namespace cxx11_apply +{ + template + struct integer_sequence + { + }; + template + struct GenSeq : GenSeq + { + }; + template + struct GenSeq<0, Is...> + { + typedef integer_sequence type; + }; + template + void __apply(Tt t, Tf f, integer_sequence seq) + { + f(std::get(t)...); + } + template + void apply(void (*F)(Ts...), std::tuple tuple) + { + typename GenSeq::type a; + __apply(tuple, F, a); + } +} + +template +__always_inline void call_tuple(void (*f)(Ts...), Ts ...arg){ + f(arg...); +} +template +__always_inline void __tuple_spawn_proxy(std::tuple *arg) { + cxx11_apply::apply(call_tuple, *arg); +} +template +__attribute__((noinline)) void tuple_spawn_proxy(std::tuple *arg) { + std::tuple ldm_arg; + dma_getn(arg, &ldm_arg, 1); + cxx11_apply::apply(call_tuple, ldm_arg); +} +template +void tmpl_entrance(void (*f)(Ts...)){ + __asm__ __volatile__ ("" + : + :"r"(tuple_spawn_proxy) + : "memory"); +} +#define ATHREAD_VISIBLE(x) template void tmpl_entrance(decltype(x)*); + +#endif +#ifdef __sw_host__ +extern "C"{ +#include +} +#include + +template +extern void slave_tuple_spawn_proxy(std::tuple*); +template +void athread_spawn_tupled(void (*f)(Ts...), Ts ...args) { + auto arg = std::tuple(f, args...); + __real_athread_spawn((void*)slave_tuple_spawn_proxy, &arg); +} + +#endif \ No newline at end of file diff --git a/src/Globals.h b/src/Globals.h index fd3efe8..dc6116c 100644 --- a/src/Globals.h +++ b/src/Globals.h @@ -51,6 +51,7 @@ Version: 2.00 #include #include +#define slave_num 64 inline double GetTime() { struct timeval tv; diff --git a/src/adapter.cpp b/src/adapter.cpp index 15b4696..adbcd8f 100644 --- a/src/adapter.cpp +++ b/src/adapter.cpp @@ -128,7 +128,7 @@ void Adapter::PreOverAnalyze(string file_name, vector &hot_seqs, int &ev #ifdef Verbose double t0 = GetTime(); #endif - auto *fastq_data_pool = new rabbit::fq::FastqDataPool(128, 1 << 22); + auto *fastq_data_pool = new rabbit::fq::FastqDataPool(4, 1 << 22); auto fqFileReader = new rabbit::fq::FastqFileReader(file_name, *fastq_data_pool, "", file_name.find(".gz") != string::npos); int64_t n_chunks = 0; @@ -306,12 +306,12 @@ vector Adapter::LoadAdaptersFromFasta(string file_name) { } int Adapter::EvalMaxLen(string file_name) { - auto *fastq_data_pool = new rabbit::fq::FastqDataPool(128, 1 << 22); + auto *fastq_data_pool = new rabbit::fq::FastqDataPool(4, 1 << 22); auto fqFileReader = new rabbit::fq::FastqFileReader(file_name, *fastq_data_pool, "", file_name.find(".gz") != string::npos); int64_t n_chunks = 0; // stat up to 256K reads - const long READ_LIMIT = 256 * 1024; + const long READ_LIMIT = 4 * 1024; const long BASE_LIMIT = 151 * READ_LIMIT; long records = 0; long bases = 0; @@ -342,12 +342,11 @@ int Adapter::EvalMaxLen(string file_name) { } string Adapter::AutoDetect(string file_name, int trim_tail) { - auto *fastq_data_pool = new rabbit::fq::FastqDataPool(128, 1 << 22); + auto *fastq_data_pool = new rabbit::fq::FastqDataPool(4, 1 << 22); auto fqFileReader = new rabbit::fq::FastqFileReader(file_name, *fastq_data_pool, "", file_name.find(".gz") != string::npos); int64_t n_chunks = 0; - // stat up to 256K reads - const long READ_LIMIT = 256 * 1024; + const long READ_LIMIT = 4 * 1024; const long BASE_LIMIT = 151 * READ_LIMIT; long records = 0; long bases = 0; diff --git a/src/main.cpp b/src/main.cpp index 588558f..0ec962c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,9 +6,10 @@ #include "th_ass.h" #include #include + using namespace std; -inline bool exists_file (const std::string& name) { +inline bool exists_file(const std::string &name) { ifstream f(name.c_str()); return f.good(); } @@ -23,34 +24,41 @@ int main(int argc, char **argv) { app.add_option("-O,--outFile2", cmd_info.out_file_name2_, "output fastq name 2"); - app.add_option("--compressLevel", cmd_info.compression_level_, "output file compression level (1 - 9), default is 4"); + app.add_option("--compressLevel", cmd_info.compression_level_, + "output file compression level (1 - 9), default is 4"); app.add_flag("--overWrite", cmd_info.overWrite_, "overwrite out file if already exists."); app.add_flag("--phred64", cmd_info.isPhred64_, "input is using phred64 scoring, default is phred33"); app.add_flag("--stdin", cmd_info.isStdin_, - "input from stdin, or -i /dev/stdin, only for se data or interleaved pe data(which means use --interleavedIn)"); + "input from stdin, or -i /dev/stdin, only for se data or interleaved pe data(which means use --interleavedIn)"); app.add_flag("--stdout", cmd_info.isStdout_, - "output to stdout, or -o /dev/stdout, only for se data or interleaved pe data(which means use --interleavedOut)"); + "output to stdout, or -o /dev/stdout, only for se data or interleaved pe data(which means use --interleavedOut)"); + int tmp_no_dup_ = 0; + app.add_flag("--noDuplicate", tmp_no_dup_, "don't do duplicate state, default is off"); app.add_flag("-a,--noTrimAdapter", cmd_info.no_trim_adapter_, "don't trim adapter, default is off"); app.add_flag("--decAdaForSe", cmd_info.se_auto_detect_adapter_, "detect adapter for se data, default is on"); app.add_flag("--decAdaForPe", cmd_info.pe_auto_detect_adapter_, - "detect adapter for pe data, default is off, tool prefers to use overlap to find adapter"); + "detect adapter for pe data, default is off, tool prefers to use overlap to find adapter"); app.add_flag("--printWhatTrimmed", cmd_info.print_what_trimmed_, - "if print what trimmed to *_trimmed_adapters.txt or not, default is not"); + "if print what trimmed to *_trimmed_adapters.txt or not, default is not"); app.add_option("--adapterSeq1", cmd_info.adapter_seq1_, "specify adapter sequence for read1"); app.add_option("--adapterSeq2", cmd_info.adapter_seq2_, "specify adapter sequence for read2"); app.add_option("--adapterFastaFile", cmd_info.adapter_fasta_file_, "specify adapter sequences use fasta file"); //app.add_option("--adapterLengthLimit", cmd_info.adapter_len_lim_, "minimum adapter length when trimming, default is 0"); - app.add_flag("-c,--correctData", cmd_info.correct_data_, "correcting low quality bases using information from overlap, default is off"); + app.add_flag("-c,--correctData", cmd_info.correct_data_, + "correcting low quality bases using information from overlap, default is off"); - app.add_option("-w,--threadNum", cmd_info.thread_number_, "number of thread used to do QC, including (de)compression for compressed data, default is 8"); + app.add_option("-w,--threadNum", cmd_info.thread_number_, + "number of thread used to do QC, including (de)compression for compressed data, default is 8"); //filter - app.add_flag("-5,--trim5End", cmd_info.trim_5end_, "do sliding window from 5end to 3end to trim low quality bases, default is off"); - app.add_flag("-3,--trim3End", cmd_info.trim_3end_, "do sliding window from 5end to 3end to trim low quality bases, default is off"); + app.add_flag("-5,--trim5End", cmd_info.trim_5end_, + "do sliding window from 5end to 3end to trim low quality bases, default is off"); + app.add_flag("-3,--trim3End", cmd_info.trim_3end_, + "do sliding window from 5end to 3end to trim low quality bases, default is off"); app.add_option("--trimFront1", cmd_info.trim_front1_, "number of bases to trim from front in read1, default is 0"); app.add_option("--trimFront2", cmd_info.trim_front2_, "number of bases to trim from front in read2, default is 0"); app.add_option("--trimTail1", cmd_info.trim_tail1_, "number of bases to trim from tail in read1, default is 0"); @@ -64,34 +72,38 @@ int main(int argc, char **argv) { app.add_flag("-u,--addUmi", cmd_info.add_umi_, "do unique molecular identifier (umi) processing, default is off"); app.add_option("--umiLen", cmd_info.umi_len_, "umi length if it is in read1/read2, default is 0"); string umiLoc = ""; - app.add_option("--umiLoc", umiLoc, "specify the location of umi, can be (index1/index2/read1/read2/per_index/per_read), default is 0"); - app.add_option("--umiPrefix", cmd_info.umi_prefix_, "identification to be added in front of umi, default is no prefix"); + app.add_option("--umiLoc", umiLoc, + "specify the location of umi, can be (index1/index2/read1/read2/per_index/per_read), default is 0"); + app.add_option("--umiPrefix", cmd_info.umi_prefix_, + "identification to be added in front of umi, default is no prefix"); app.add_option("--umiSkip", cmd_info.umi_skip_, "the number bases to skip if umi exists, default is 0"); //app.add_option("--seqLen", cmd_info.seq_len_, "max sequence length, default is 200"); //TGS - app.add_flag("--TGS", cmd_info.is_TGS_, "process third generation sequencing (TGS) data (only for se data, does not support trimming and will not produce output files), default is off"); + app.add_flag("--TGS", cmd_info.is_TGS_, + "process third generation sequencing (TGS) data (only for se data, does not support trimming and will not produce output files), default is off"); //overrepresentation app.add_flag("-p,--doOverrepresentation", cmd_info.do_overrepresentation_, - "do over-representation sequence analysis, default is off"); + "do over-representation sequence analysis, default is off"); app.add_option("-P,--overrepresentationSampling", cmd_info.overrepresentation_sampling_, - "do overrepresentation every [] reads, default is 20"); + "do overrepresentation every [] reads, default is 20"); app.add_flag("--printORPSeqs", cmd_info.print_ORP_seqs_, - "if print overrepresentation sequences to *ORP_sequences.txt or not, default is not"); + "if print overrepresentation sequences to *ORP_sequences.txt or not, default is not"); //insert size analyze - app.add_flag("--noInsertSize", cmd_info.no_insert_size_, "no insert size analysis (only for pe data), default is to do insert size analysis"); + app.add_flag("--noInsertSize", cmd_info.no_insert_size_, + "no insert size analysis (only for pe data), default is to do insert size analysis"); //interleaved app.add_flag("--interleavedIn", cmd_info.interleaved_in_, - "use interleaved input (only for pe data), default is off"); + "use interleaved input (only for pe data), default is off"); app.add_flag("--interleavedOut", cmd_info.interleaved_out_, - "use interleaved output (only for pe data), default is off"); + "use interleaved output (only for pe data), default is off"); //parallel gz @@ -114,7 +126,7 @@ int main(int argc, char **argv) { CLI11_PARSE(app, argc, argv); - if(cmd_info.compression_level_ < 1 || cmd_info.compression_level_ > 9){ + if (cmd_info.compression_level_ < 1 || cmd_info.compression_level_ > 9) { printf("error : compression level should in [1, 9]!\n"); return 0; } @@ -124,24 +136,28 @@ int main(int argc, char **argv) { return 0; } - if(cmd_info.is_TGS_){ - if(cmd_info.in_file_name2_.length() > 0){ + if (tmp_no_dup_)cmd_info.state_duplicate_ = false; + + if (cmd_info.is_TGS_) { + if (cmd_info.in_file_name2_.length() > 0) { printf("WARNING : the TGS module does not support pe inputs, so ignore the -I parameter.\n"); cmd_info.in_file_name2_ = ""; } - if(cmd_info.out_file_name1_.length() != 0 || cmd_info.out_file_name2_.length() != 0){ + if (cmd_info.out_file_name1_.length() != 0 || cmd_info.out_file_name2_.length() != 0) { printf("WARNING : the TGS module does not support trimming and will not produce output files, so ignore the -o and -O parameter.\n"); cmd_info.out_file_name1_ = ""; cmd_info.out_file_name2_ = ""; } } - if(cmd_info.in_file_name2_.length() > 0 && cmd_info.out_file_name2_.length() == 0 && cmd_info.interleaved_out_ == 0){ + if (cmd_info.in_file_name2_.length() > 0 && cmd_info.out_file_name2_.length() == 0 && + cmd_info.interleaved_out_ == 0) { error_exit("when interleaved_out module is off, two input files can not correspond to one output.\n"); } - if(cmd_info.out_file_name2_.length() > 0 && cmd_info.in_file_name2_.length() == 0 && cmd_info.interleaved_in_ == 0){ + if (cmd_info.out_file_name2_.length() > 0 && cmd_info.in_file_name2_.length() == 0 && + cmd_info.interleaved_in_ == 0) { error_exit("when interleaved_in module is off, one input file can not correspond to two outputs.\n"); } @@ -151,16 +167,16 @@ int main(int argc, char **argv) { //check if files is ok int files_ok = 1; if (is_pe) { - if (!cmd_info.interleaved_in_){ - if(ends_with(cmd_info.in_file_name1_, ".gz") != ends_with(cmd_info.in_file_name2_, ".gz")) + if (!cmd_info.interleaved_in_) { + if (ends_with(cmd_info.in_file_name1_, ".gz") != ends_with(cmd_info.in_file_name2_, ".gz")) files_ok = 0; } - if (!cmd_info.interleaved_out_){ - if(ends_with(cmd_info.out_file_name1_, ".gz") != ends_with(cmd_info.out_file_name2_, ".gz")) + if (!cmd_info.interleaved_out_) { + if (ends_with(cmd_info.out_file_name1_, ".gz") != ends_with(cmd_info.out_file_name2_, ".gz")) files_ok = 0; } } - if(files_ok == 0){ + if (files_ok == 0) { printf("error : for PE data, both files must be of the same type, i.e. cannot be one compressed and one uncompressed!"); return 0; } @@ -173,22 +189,22 @@ int main(int argc, char **argv) { if (in_gz) in_module |= 1; if (out_gz) in_module |= 2; - if(cmd_info.in_file_name2_.length() == 0)cmd_info.thread_number_ = min(32, cmd_info.thread_number_); + if (cmd_info.in_file_name2_.length() == 0)cmd_info.thread_number_ = min(32, cmd_info.thread_number_); else cmd_info.thread_number_ = min(cmd_info.thread_number_, 64); int t1, t2, t3; if (is_pe) { //PE int tot_threads = cmd_info.thread_number_; - if (in_module == 3){ + if (in_module == 3) { t1 = thread_assignment_pe3[tot_threads].pugz_t; t2 = thread_assignment_pe3[tot_threads].w_t; t3 = thread_assignment_pe3[tot_threads].pigz_t; - } else if (in_module == 2){ + } else if (in_module == 2) { t1 = thread_assignment_pe2[tot_threads].pugz_t; t2 = thread_assignment_pe2[tot_threads].w_t; t3 = thread_assignment_pe2[tot_threads].pigz_t; - } else if (in_module == 1){ + } else if (in_module == 1) { t1 = thread_assignment_pe1[tot_threads].pugz_t; t2 = thread_assignment_pe1[tot_threads].w_t; t3 = thread_assignment_pe1[tot_threads].pigz_t; @@ -197,18 +213,18 @@ int main(int argc, char **argv) { t2 = thread_assignment_pe0[tot_threads].w_t; t3 = thread_assignment_pe0[tot_threads].pigz_t; } - }else { + } else { //SE int tot_threads = cmd_info.thread_number_; - if (in_module == 3){ + if (in_module == 3) { t1 = thread_assignment_se3[tot_threads].pugz_t; t2 = thread_assignment_se3[tot_threads].w_t; t3 = thread_assignment_se3[tot_threads].pigz_t; - } else if (in_module == 2){ + } else if (in_module == 2) { t1 = thread_assignment_se2[tot_threads].pugz_t; t2 = thread_assignment_se2[tot_threads].w_t; t3 = thread_assignment_se2[tot_threads].pigz_t; - } else if (in_module == 1){ + } else if (in_module == 1) { t1 = thread_assignment_se1[tot_threads].pugz_t; t2 = thread_assignment_se1[tot_threads].w_t; t3 = thread_assignment_se1[tot_threads].pigz_t; @@ -220,20 +236,20 @@ int main(int argc, char **argv) { } cmd_info.thread_number_ = t2; - if (t1 == 0){ + if (t1 == 0) { cmd_info.use_pugz_ = 0; } else { cmd_info.use_pugz_ = 1; cmd_info.pugz_threads_ = t1; } - if (t3 <= 1){ + if (t3 <= 1) { cmd_info.use_pigz_ = 0; } else { cmd_info.use_pigz_ = 1; cmd_info.pigz_threads_ = t3; } - if(cmd_info.interleaved_in_ || cmd_info.interleaved_out_){ + if (cmd_info.interleaved_in_ || cmd_info.interleaved_out_) { cmd_info.use_pugz_ = 0; cmd_info.use_pigz_ = 0; } @@ -262,27 +278,27 @@ int main(int argc, char **argv) { if (cmd_info.in_file_name2_.length()) printf("inFile2 is %s\n", cmd_info.in_file_name2_.c_str()); if (cmd_info.out_file_name1_.length()) { bool res = exists_file(cmd_info.out_file_name1_); - if(res){ + if (res) { string tmps; - if(cmd_info.overWrite_){ + if (cmd_info.overWrite_) { tmps = "y"; - }else{ + } else { char tmp[100]; printf("\n"); printf("%s already exists, overwrite it or not ? (y/n)\n", cmd_info.out_file_name1_.c_str()); scanf("%s", tmp); tmps = string(tmp); - while(tmps != "y" && tmps != "n"){ + while (tmps != "y" && tmps != "n") { printf("please input y or n\n"); scanf("%s", tmp); tmps = string(tmp); } } - if(tmps == "y"){ + if (tmps == "y") { remove(cmd_info.out_file_name1_.c_str()); - } else if(tmps == "n"){ + } else if (tmps == "n") { return 0; - } else{ + } else { assert(0); } } @@ -290,27 +306,27 @@ int main(int argc, char **argv) { } if (cmd_info.out_file_name2_.length()) { bool res = exists_file(cmd_info.out_file_name2_); - if(res){ + if (res) { string tmps; - if(cmd_info.overWrite_){ + if (cmd_info.overWrite_) { tmps = "y"; - }else{ + } else { char tmp[100]; printf("\n"); printf("%s already exists, overwrite it or not ? (y/n)\n", cmd_info.out_file_name2_.c_str()); scanf("%s", tmp); tmps = string(tmp); - while(tmps != "y" && tmps != "n"){ + while (tmps != "y" && tmps != "n") { printf("please input y or n\n"); scanf("%s", tmp); tmps = string(tmp); } } - if(tmps == "y"){ + if (tmps == "y") { remove(cmd_info.out_file_name2_.c_str()); - } else if(tmps == "n"){ + } else if (tmps == "n") { return 0; - } else{ + } else { assert(0); } } @@ -322,15 +338,15 @@ int main(int argc, char **argv) { string suffix_name1; string suffix_name2; - if(out_gz){ + if (out_gz) { //printf("===out1 is %s\n", cmd_info.out_file_name1_.c_str()); - if(cmd_info.out_file_name2_.length() > 0){ + if (cmd_info.out_file_name2_.length() > 0) { //printf("===out2 is %s\n", cmd_info.out_file_name2_.c_str()); } //printf("now change out file name...\n"); int real_begin1 = 0; - for(int i = cmd_info.out_file_name1_.length() - 1; i >= 0; i--){ - if(cmd_info.out_file_name1_[i] == '/'){ + for (int i = cmd_info.out_file_name1_.length() - 1; i >= 0; i--) { + if (cmd_info.out_file_name1_[i] == '/') { real_begin1 = i + 1; break; } @@ -340,22 +356,23 @@ int main(int argc, char **argv) { //printf("prefix_name1 %s\n", prefix_name1.c_str()); //printf("suffix_name1 %s\n", suffix_name1.c_str()); cmd_info.out_file_name1_ = prefix_name1 + "tmp_" + to_string(rand()) + suffix_name1; - if(cmd_info.out_file_name2_.length() > 0){ + if (cmd_info.out_file_name2_.length() > 0) { int real_begin2 = 0; - for(int i = cmd_info.out_file_name2_.length() - 1; i >= 0; i--){ - if(cmd_info.out_file_name2_[i] == '/'){ + for (int i = cmd_info.out_file_name2_.length() - 1; i >= 0; i--) { + if (cmd_info.out_file_name2_[i] == '/') { real_begin2 = i + 1; break; } } prefix_name2 = cmd_info.out_file_name2_.substr(0, real_begin2); - suffix_name2 = cmd_info.out_file_name2_.substr(real_begin2, cmd_info.out_file_name2_.length() - real_begin2); + suffix_name2 = cmd_info.out_file_name2_.substr(real_begin2, + cmd_info.out_file_name2_.length() - real_begin2); //printf("prefix_name2 %s\n", prefix_name2.c_str()); //printf("suffix_name2 %s\n", suffix_name2.c_str()); cmd_info.out_file_name2_ = prefix_name2 + "tmp_" + to_string(rand()) + suffix_name2; } //printf("===out1 is %s\n", cmd_info.out_file_name1_.c_str()); - if(cmd_info.out_file_name2_.length() > 0){ + if (cmd_info.out_file_name2_.length() > 0) { //printf("===out2 is %s\n", cmd_info.out_file_name2_.c_str()); } } @@ -395,7 +412,7 @@ int main(int argc, char **argv) { if (umiLoc.empty()) error_exit("You've enabled UMI by (--addUmi), you should specify the UMI location by (--umiLoc)"); if (umiLoc != "index1" && umiLoc != "index2" && umiLoc != "read1" && umiLoc != "read2" && - umiLoc != "per_index" && umiLoc != "per_read") { + umiLoc != "per_index" && umiLoc != "per_read") { error_exit("UMI location can only be index1/index2/read1/read2/per_index/per_read"); } if (cmd_info.in_file_name2_.length() == 0 && (umiLoc == "index2" || umiLoc == "read2")) @@ -429,7 +446,7 @@ int main(int argc, char **argv) { if (cmd_info.use_pugz_) { - if(cmd_info.pugz_threads_ > 8){ + if (cmd_info.pugz_threads_ > 8) { //printf("pugz thread number must <= 8, now set pugz thread number == 8.\n"); cmd_info.pugz_threads_ = 8; } @@ -442,21 +459,23 @@ int main(int argc, char **argv) { // printf("now use %d thread to do QC operations\n", cmd_info.thread_number_); //else // printf("now use %d threads to do QC operations\n", cmd_info.thread_number_); + printf("eval len\n"); int mx_len = Adapter::EvalMaxLen(cmd_info.in_file_name1_); + printf("eval done\n"); //printf("auto detect max seqs len is %d\n", mx_len); cmd_info.seq_len_ = mx_len; - if(cmd_info.adapter_fasta_file_.length() > 0){ + if (cmd_info.adapter_fasta_file_.length() > 0) { printf("loading adatper from %s\n", cmd_info.adapter_fasta_file_.c_str()); cmd_info.adapter_from_fasta_ = Adapter::LoadAdaptersFromFasta(cmd_info.adapter_fasta_file_); sort(cmd_info.adapter_from_fasta_.begin(), cmd_info.adapter_from_fasta_.end()); - for(auto item : cmd_info.adapter_from_fasta_)printf(" --- %s ---\n", item.c_str()); + for (auto item: cmd_info.adapter_from_fasta_)printf(" --- %s ---\n", item.c_str()); } double t_start = GetTime(); if (cmd_info.in_file_name2_.length() || cmd_info.interleaved_in_) { //PE if ((cmd_info.out_file_name1_.length() > 0 && cmd_info.out_file_name2_.length() > 0) || - cmd_info.interleaved_out_) { + cmd_info.interleaved_out_) { cmd_info.write_data_ = true; } @@ -586,7 +605,9 @@ int main(int argc, char **argv) { } if (cmd_info.adapter_seq1_.length() > 0) cmd_info.adapter_len_lim_ = min(cmd_info.adapter_len_lim_, int(cmd_info.adapter_seq1_.length())); + printf("000\n"); SeQc *se_qc = new SeQc(&cmd_info); + printf("111\n"); if (cmd_info.is_TGS_) { se_qc->ProcessSeTGS(); } else { @@ -594,18 +615,18 @@ int main(int argc, char **argv) { } delete se_qc; } - if(out_gz){ + if (out_gz) { string out_name1 = cmd_info.out_file_name1_; out_name1 = out_name1.substr(0, out_name1.find(".gz")); remove(out_name1.c_str()); - if(cmd_info.out_file_name2_.length() > 0){ + if (cmd_info.out_file_name2_.length() > 0) { string out_name2 = cmd_info.out_file_name2_; out_name2 = out_name2.substr(0, out_name2.find(".gz")); remove(out_name2.c_str()); } string init_name1 = prefix_name1 + suffix_name1; rename(cmd_info.out_file_name1_.c_str(), init_name1.c_str()); - if(cmd_info.out_file_name2_.length() > 0){ + if (cmd_info.out_file_name2_.length() > 0) { string init_name2 = prefix_name2 + suffix_name2; rename(cmd_info.out_file_name2_.c_str(), init_name2.c_str()); } diff --git a/src/seqc.cpp b/src/seqc.cpp index 4c4642f..695b4d6 100644 --- a/src/seqc.cpp +++ b/src/seqc.cpp @@ -2,8 +2,20 @@ // Created by ylf9811 on 2021/7/6. // #include "seqc.h" +#include "tuple_spawn.hpp" using namespace std; +struct qc_data { + ThreadInfo **thread_info_; + CmdInfo *cmd_info_; + vector *data_; + vector *pass_data_; + int *cnt; +}; + + +extern void slave_tgsfunc(qc_data *para); + /** * @brief Construct function * @param cmd_info1 : cmd information @@ -18,7 +30,7 @@ SeQc::SeQc(CmdInfo *cmd_info1) { in_is_zip_ = cmd_info1->in_file_name1_.find(".gz") != string::npos; out_is_zip_ = cmd_info1->out_file_name1_.find(".gz") != string::npos; if (cmd_info1->write_data_) { - out_queue_ = new moodycamel::ConcurrentQueue>; + out_queue_ = new moodycamel::ConcurrentQueue>; queueNumNow = 0; queueSizeLim = 1 << 5; if (out_is_zip_) { @@ -56,10 +68,11 @@ SeQc::SeQc(CmdInfo *cmd_info1) { umier_ = new Umier(cmd_info1); } if (cmd_info1->use_pugz_) { - pugzQueue = new moodycamel::ReaderWriterQueue>(1 << 10); + pugzQueue = new moodycamel::ReaderWriterQueue> + (1 << 10); } if (cmd_info1->use_pigz_) { - pigzQueue = new moodycamel::ReaderWriterQueue>; + pigzQueue = new moodycamel::ReaderWriterQueue>; pigzLast.first = new char[1 << 24]; pigzLast.second = 0; } @@ -149,99 +162,117 @@ void SeQc::Read2Chars(neoReference &ref, char *out_data, int &pos) { out_data[pos++] = '\n'; } -/** - * @brief get fastq data chunks from the data queue and do QC for them - * @param thread_info : thread information - * @param fastq_data_pool :a fastq data pool, it will be used to release data chunk - * @param dq : data queue - */ -void SeQc::ConsumerSeFastqTask(ThreadInfo *thread_info, rabbit::fq::FastqDataPool *fastq_data_pool, +void SeQc::ConsumerSeFastqTask(ThreadInfo **thread_infos, rabbit::fq::FastqDataPool *fastq_data_pool, rabbit::core::TDataQueue &dq) { rabbit::int64 id = 0; rabbit::fq::FastqDataChunk *fqdatachunk; - + qc_data para; + para.cnt = new int[slave_num]; + for(int i = 0; i < slave_num; i++) para.cnt[i] = 0; + para.cmd_info_ = cmd_info_; + para.thread_info_ = thread_infos; if (cmd_info_->is_TGS_) { + double t0 = GetTime(); + double tsum = 0; + athread_init(); while (dq.Pop(id, fqdatachunk)) { - vector data; + double tt0 = GetTime(); + vector data; rabbit::fq::chunkFormat(fqdatachunk, data, true); - for (auto item: data) { - thread_info->TGS_state_->tgsStatRead(item, cmd_info_->isPhred64_); - } + //printf("format cost %lf\n", GetTime() - tt0); + tt0 = GetTime(); + para.data_ = &data; + athread_spawn_tupled(slave_tgsfunc, ¶); + athread_join(); + //printf("slave cost %lf\n", GetTime() - tt0); + tsum += GetTime() - tt0; fastq_data_pool->Release(fqdatachunk); } + athread_halt(); + // + int ttsum = 0; + for(int i = 0; i < slave_num; i++) ttsum += para.cnt[i]; + printf("ttsum %d\n", ttsum); + printf("TGS tot cost %lf\n", GetTime() - t0); + printf("TGS slave cost %lf\n", tsum); } else { + int sum_ = 0; while (dq.Pop(id, fqdatachunk)) { - vector data; - vector pass_data; + vector data; + vector pass_data; rabbit::fq::chunkFormat(fqdatachunk, data, true); int out_len = 0; + sum_ += data.size(); + printf("=== %d\n", sum_); for (auto item: data) { - thread_info->pre_state1_->StateInfo(item); - if (cmd_info_->state_duplicate_) { - duplicate_->statRead(item); - } - - if (cmd_info_->add_umi_) { - umier_->ProcessSe(item); - } - bool trim_res = filter_->TrimSeq(item, cmd_info_->trim_front1_, cmd_info_->trim_tail1_); - - if (trim_res && cmd_info_->trim_polyg_) { - PolyX::trimPolyG(item, cmd_info_->trim_poly_len_); - } - - if (trim_res && cmd_info_->trim_polyx_) { - PolyX::trimPolyX(item, cmd_info_->trim_poly_len_); - } - - if (trim_res && cmd_info_->trim_adapter_) { - int res = 0; - bool is_trimmed = false; - if(cmd_info_->detect_adapter1_) { - if (cmd_info_->print_what_trimmed_) { - res = Adapter::TrimAdapter(item, cmd_info_->adapter_seq1_, - thread_info->aft_state1_->adapter_map_, cmd_info_->adapter_len_lim_, false); - } else { - res = Adapter::TrimAdapter(item, cmd_info_->adapter_seq1_, false); - } - if (res) { - is_trimmed = true; - thread_info->aft_state1_->AddTrimAdapter(); - thread_info->aft_state1_->AddTrimAdapterBase(res); - } - } - if(cmd_info_->adapter_from_fasta_.size() > 0) { - if (cmd_info_->print_what_trimmed_) { - res = Adapter::TrimAdapters(item, cmd_info_->adapter_from_fasta_, - thread_info->aft_state1_->adapter_map_, cmd_info_->adapter_len_lim_, false); - } else { - res = Adapter::TrimAdapters(item, cmd_info_->adapter_from_fasta_, false); - } - if (res) { - if(!is_trimmed) thread_info->aft_state1_->AddTrimAdapter(); - thread_info->aft_state1_->AddTrimAdapterBase(res); - } - - } - } - - int filter_res = filter_->ReadFiltering(item, trim_res, cmd_info_->isPhred64_); - if (filter_res == 0) { - thread_info->aft_state1_->StateInfo(item); - thread_info->aft_state1_->AddPassReads(); - if (cmd_info_->write_data_) { - pass_data.push_back(item); - out_len += item.lname + item.lseq + item.lstrand + item.lqual + 4; - } - } else if (filter_res == 1) { - thread_info->aft_state1_->AddFailN(); - } else if (filter_res == 2) { - thread_info->aft_state1_->AddFailShort(); - } else if (filter_res == 3) { - thread_info->aft_state1_->AddFailLong(); - } else if (filter_res == 4) { - thread_info->aft_state1_->AddFailLowq(); - } + //thread_info->pre_state1_->StateInfo(item); + //if (cmd_info_->state_duplicate_) { + // duplicate_->statRead(item); + //} + + //if (cmd_info_->add_umi_) { + // umier_->ProcessSe(item); + //} + //bool trim_res = filter_->TrimSeq(item, cmd_info_->trim_front1_, cmd_info_->trim_tail1_); + + //if (trim_res && cmd_info_->trim_polyg_) { + // PolyX::trimPolyG(item, cmd_info_->trim_poly_len_); + //} + + //if (trim_res && cmd_info_->trim_polyx_) { + // PolyX::trimPolyX(item, cmd_info_->trim_poly_len_); + //} + + //if (trim_res && cmd_info_->trim_adapter_) { + // int res = 0; + // bool is_trimmed = false; + // if (cmd_info_->detect_adapter1_) { + // if (cmd_info_->print_what_trimmed_) { + // res = Adapter::TrimAdapter(item, cmd_info_->adapter_seq1_, + // thread_info->aft_state1_->adapter_map_, + // cmd_info_->adapter_len_lim_, false); + // } else { + // res = Adapter::TrimAdapter(item, cmd_info_->adapter_seq1_, false); + // } + // if (res) { + // is_trimmed = true; + // thread_info->aft_state1_->AddTrimAdapter(); + // thread_info->aft_state1_->AddTrimAdapterBase(res); + // } + // } + // if (cmd_info_->adapter_from_fasta_.size() > 0) { + // if (cmd_info_->print_what_trimmed_) { + // res = Adapter::TrimAdapters(item, cmd_info_->adapter_from_fasta_, + // thread_info->aft_state1_->adapter_map_, + // cmd_info_->adapter_len_lim_, false); + // } else { + // res = Adapter::TrimAdapters(item, cmd_info_->adapter_from_fasta_, false); + // } + // if (res) { + // if (!is_trimmed) thread_info->aft_state1_->AddTrimAdapter(); + // thread_info->aft_state1_->AddTrimAdapterBase(res); + // } + + // } + //} + + //int filter_res = filter_->ReadFiltering(item, trim_res, cmd_info_->isPhred64_); + //if (filter_res == 0) { + // thread_info->aft_state1_->StateInfo(item); + // thread_info->aft_state1_->AddPassReads(); + // if (cmd_info_->write_data_) { + // pass_data.push_back(item); + // out_len += item.lname + item.lseq + item.lstrand + item.lqual + 4; + // } + //} else if (filter_res == 1) { + // thread_info->aft_state1_->AddFailN(); + //} else if (filter_res == 2) { + // thread_info->aft_state1_->AddFailShort(); + //} else if (filter_res == 3) { + // thread_info->aft_state1_->AddFailLong(); + //} else if (filter_res == 4) { + // thread_info->aft_state1_->AddFailLowq(); + //} } if (cmd_info_->write_data_) { @@ -259,7 +290,7 @@ void SeQc::ConsumerSeFastqTask(ThreadInfo *thread_info, rabbit::fq::FastqDataPoo #endif usleep(100); } - + out_queue_->enqueue(make_pair(out_data, out_len)); queueNumNow++; //mylock.unlock(); @@ -392,7 +423,6 @@ void SeQc::PigzTask() { infos[4][tmp_level.length()] = '\0'; - infos[5] = "-f"; infos[6] = "-b"; infos[7] = "4096"; @@ -428,8 +458,8 @@ void SeQc::ProcessSeFastq() { auto *fastqPool = new rabbit::fq::FastqDataPool(32, 1 << 22); rabbit::core::TDataQueue queue1(32, 1); - auto **p_thread_info = new ThreadInfo *[cmd_info_->thread_number_]; - for (int t = 0; t < cmd_info_->thread_number_; t++) { + auto **p_thread_info = new ThreadInfo *[slave_num]; + for (int t = 0; t < slave_num; t++) { p_thread_info[t] = new ThreadInfo(cmd_info_, false); } thread *write_thread; @@ -442,12 +472,9 @@ void SeQc::ProcessSeFastq() { } thread producer( - bind(&SeQc::ProducerSeFastqTask, this, cmd_info_->in_file_name1_, fastqPool, ref(queue1))); - auto **threads = new thread *[cmd_info_->thread_number_]; - for (int t = 0; t < cmd_info_->thread_number_; t++) { - threads[t] = new thread( - bind(&SeQc::ConsumerSeFastqTask, this, p_thread_info[t], fastqPool, ref(queue1))); - } + bind(&SeQc::ProducerSeFastqTask, this, cmd_info_->in_file_name1_, fastqPool, ref(queue1))); + thread consumer( + bind(&SeQc::ConsumerSeFastqTask, this, p_thread_info, fastqPool, ref(queue1))); if (cmd_info_->use_pugz_) { pugzer->join(); } @@ -456,9 +483,7 @@ void SeQc::ProcessSeFastq() { #ifdef Verbose printf("producer cost %.4f\n", GetTime() - t0); #endif - for (int t = 0; t < cmd_info_->thread_number_; t++) { - threads[t]->join(); - } + consumer.join(); #ifdef Verbose printf("consumer cost %.4f\n", GetTime() - t0); #endif @@ -473,10 +498,10 @@ void SeQc::ProcessSeFastq() { printf("all thrad done\n"); printf("now merge thread info\n"); #endif - vector pre_vec_state; - vector aft_vec_state; + vector < State * > pre_vec_state; + vector < State * > aft_vec_state; - for (int t = 0; t < cmd_info_->thread_number_; t++) { + for (int t = 0; t < slave_num; t++) { pre_vec_state.push_back(p_thread_info[t]->pre_state1_); aft_vec_state.push_back(p_thread_info[t]->aft_state1_); } @@ -569,12 +594,10 @@ void SeQc::ProcessSeFastq() { delete fastqPool; - for (int t = 0; t < cmd_info_->thread_number_; t++) { - delete threads[t]; + for (int t = 0; t < slave_num; t++) { delete p_thread_info[t]; } - delete[] threads; delete[] p_thread_info; if (cmd_info_->write_data_) { delete write_thread; @@ -592,33 +615,28 @@ void SeQc::ProcessSeTGS() { auto *fastqPool = new rabbit::fq::FastqDataPool(32, 1 << 22); rabbit::core::TDataQueue queue1(32, 1); - auto **p_thread_info = new ThreadInfo *[cmd_info_->thread_number_]; - for (int t = 0; t < cmd_info_->thread_number_; t++) { + auto **p_thread_info = new ThreadInfo *[slave_num]; + for (int t = 0; t < slave_num; t++) { p_thread_info[t] = new ThreadInfo(cmd_info_, false); } thread producer( - bind(&SeQc::ProducerSeFastqTask, this, cmd_info_->in_file_name1_, fastqPool, ref(queue1))); - auto **threads = new thread *[cmd_info_->thread_number_]; - for (int t = 0; t < cmd_info_->thread_number_; t++) { - threads[t] = new thread( - bind(&SeQc::ConsumerSeFastqTask, this, p_thread_info[t], fastqPool, ref(queue1))); - } + bind(&SeQc::ProducerSeFastqTask, this, cmd_info_->in_file_name1_, fastqPool, ref(queue1))); + thread consumer( + bind(&SeQc::ConsumerSeFastqTask, this, p_thread_info, fastqPool, ref(queue1))); if (cmd_info_->use_pugz_) { pugzer->join(); } producer.join(); - for (int t = 0; t < cmd_info_->thread_number_; t++) { - threads[t]->join(); - } + consumer.join(); #ifdef Verbose printf("all thrad done\n"); printf("now merge thread info\n"); #endif - vector vec_state; + vector vec_state; - for (int t = 0; t < cmd_info_->thread_number_; t++) { + for (int t = 0; t < slave_num; t++) { vec_state.push_back(p_thread_info[t]->TGS_state_); } auto mer_state = TGSStats::merge(vec_state); @@ -637,11 +655,11 @@ void SeQc::ProcessSeTGS() { Repoter::ReportHtmlTGS(srr_name + "_RabbitQCPlus.html", command, mer_state, cmd_info_->in_file_name1_); delete fastqPool; - for (int t = 0; t < cmd_info_->thread_number_; t++) { - delete threads[t]; + //printf("111\n"); + for (int t = 0; t < slave_num; t++) { + //printf("11 %d\n", t); delete p_thread_info[t]; } - - delete[] threads; + //printf("111\n"); delete[] p_thread_info; } diff --git a/src/seqc.h b/src/seqc.h index c7c464f..82cb268 100644 --- a/src/seqc.h +++ b/src/seqc.h @@ -48,7 +48,7 @@ class SeQc { void ProducerSeFastqTask(std::string file, rabbit::fq::FastqDataPool *fastq_data_pool, rabbit::core::TDataQueue &dq); - void ConsumerSeFastqTask(ThreadInfo *thread_info, rabbit::fq::FastqDataPool *fastq_data_pool, + void ConsumerSeFastqTask(ThreadInfo **thread_infos, rabbit::fq::FastqDataPool *fastq_data_pool, rabbit::core::TDataQueue &dq); void WriteSeFastqTask(); diff --git a/src/state.cpp b/src/state.cpp index 9f79fcc..00864ca 100644 --- a/src/state.cpp +++ b/src/state.cpp @@ -16,7 +16,7 @@ #endif #define MODB 20 -#define gcMax 100000 +#define gcMax 100 using namespace std; @@ -105,8 +105,6 @@ State::~State() { } } -#define BB 233ll -#define MM 1000000000000000003ll void State::HashInsert(const char *seq, int len, int eva_len) { uint64_t now = NT64(seq, len); diff --git a/src/tgsstate.cpp b/src/tgsstate.cpp index b57d668..8bfc8aa 100644 --- a/src/tgsstate.cpp +++ b/src/tgsstate.cpp @@ -5,10 +5,12 @@ #include "tgsstate.h" using namespace std; -TGSStats::TGSStats(int minLen) { +TGSStats::TGSStats(int minLen, bool isMerge) { + if(isMerge) mTotalReadsLen = new int[64 << 20]; + else mTotalReadsLen = new int[1 << 20]; + countLenNum = 0; mMinlen = minLen; mHalfMinlen = mMinlen >> 1; - ///mLengths; int size_range = mMinlen >> 1; head_qual_sum = new int64_t[size_range]; tail_qual_sum = new int64_t[size_range]; @@ -71,65 +73,6 @@ void TGSStats::updateTop5(int rlen, double avgQual) { } } -void TGSStats::tgsStatRead(neoReference &ref, bool isPhred64) { - const int rlen = ref.lseq; - const char *seq = reinterpret_cast(ref.base + ref.pseq); - const char *quality = reinterpret_cast(ref.base + ref.pqual); - int size_range = mMinlen >> 1; - size_range = min(size_range, rlen); - int i; - mReadsNum++; - mBasesNum += rlen; - char c1, c2; - mTotalReadsLen.push_back(rlen); - int phredSub = 33; - if (isPhred64) phredSub = 64; - int sumQual = 0; - for (int i = 0; i < rlen; i++) { - int qual = (quality[i] - phredSub); - sumQual += qual; - if (qual >= 5) mBases51015Num[0]++; - if (qual >= 10) mBases51015Num[1]++; - if (qual >= 15) mBases51015Num[2]++; - } - updateTop5(rlen, 1.0 * sumQual / rlen); - //if (rlen > mMinlen) { - //[1] stats lengths - mLengths.push_back(rlen); - //--head - for (i = 0; i < size_range; ++i) { - c1 = seq[i]; - //[2] quality sum - head_qual_sum[i] += (quality[i] - phredSub); - //[3] sequence count - if (c1 == 'A') { - head_seq_pos_count[0][i]++; - } else if (c1 == 'T') { - head_seq_pos_count[1][i]++; - } else if (c1 == 'C') { - head_seq_pos_count[2][i]++; - } else if (c1 == 'G') { - head_seq_pos_count[3][i]++; - } - } - //--tail - for (i = rlen - 1; i >= rlen - size_range; --i) { - c2 = seq[i]; - tail_qual_sum[i - (rlen - size_range)] += (quality[i] - phredSub); - - if (c2 == 'A') { - tail_seq_pos_count[0][i - (rlen - size_range)]++; - } else if (c2 == 'T') { - tail_seq_pos_count[1][i - (rlen - size_range)]++; - } else if (c2 == 'C') { - tail_seq_pos_count[2][i - (rlen - size_range)]++; - } else if (c2 == 'G') { - tail_seq_pos_count[3][i - (rlen - size_range)]++; - } - } - //} -} - void TGSStats::print() { @@ -171,16 +114,15 @@ void TGSStats::print() { } TGSStats *TGSStats::merge(vector &list) { - int i; const int minLen = list[0]->mMinlen; - TGSStats *s = new TGSStats(minLen); + TGSStats *s = new TGSStats(minLen, true); for (TGSStats *ds: list) { - //mLengths - //s->mLengths.push_back(ds->mLengths); - //ds->print(); - s->mLengths.insert(s->mLengths.end(), ds->mLengths.begin(), ds->mLengths.end()); - s->mTotalReadsLen.insert(s->mTotalReadsLen.end(), ds->mTotalReadsLen.begin(), ds->mTotalReadsLen.end()); - for (i = 0; i < (minLen >> 1); ++i) { + //printf("ooo1\n"); + for(int i = 0; i < ds->countLenNum; i++){ + s->mTotalReadsLen[s->countLenNum++] = ds->mTotalReadsLen[i]; + } + //printf("ooo2\n"); + for (int i = 0; i < (minLen >> 1); ++i) { //head_seq s->head_seq_pos_count[0][i] += ds->head_seq_pos_count[0][i]; s->head_seq_pos_count[1][i] += ds->head_seq_pos_count[1][i]; @@ -192,25 +134,28 @@ TGSStats *TGSStats::merge(vector &list) { s->tail_seq_pos_count[2][i] += ds->tail_seq_pos_count[2][i]; s->tail_seq_pos_count[3][i] += ds->tail_seq_pos_count[3][i]; } - for (i = 0; i < (minLen >> 1); ++i) { + //printf("ooo3\n"); + for (int i = 0; i < (minLen >> 1); ++i) { //head_qual s->head_qual_sum[i] += ds->head_qual_sum[i]; //tail_qual s->tail_qual_sum[i] += ds->tail_qual_sum[i]; } + //printf("ooo4\n"); s->mReadsNum += ds->mReadsNum; s->mBasesNum += ds->mBasesNum; s->mBases51015Num[0] += ds->mBases51015Num[0]; s->mBases51015Num[1] += ds->mBases51015Num[1]; s->mBases51015Num[2] += ds->mBases51015Num[2]; + //printf("ooo5\n"); for (int i = 0; i < 5; i++) { s->updateTop5(ds->mTop5QualReads[i].second, ds->mTop5QualReads[i].first); s->updateTop5(ds->mTop5LengReads[i].first, ds->mTop5LengReads[i].second); } + //printf("ooo6\n"); } - //s->head_qual_mean = s->head_qual_sum/s->mLengths.size() return s; } @@ -218,17 +163,17 @@ void TGSStats::CalReadsLens() { int Ppre = 10; mMaxReadsLen = 0; int64_t readLenSum = 0; - for (auto item: mTotalReadsLen) { - mMaxReadsLen = max(mMaxReadsLen, item); - readLenSum += item; + for (int i = 0; i < countLenNum; i++) { + mMaxReadsLen = max(mMaxReadsLen, mTotalReadsLen[i]); + readLenSum += mTotalReadsLen[i]; } - mAvgReadsLen = 1.0 * readLenSum / mTotalReadsLen.size(); + mAvgReadsLen = 1.0 * readLenSum / countLenNum; mMaxReadsLen /= Ppre; mMaxReadsLen += Ppre; readsLens = new int[mMaxReadsLen]; for (int i = 0; i < mMaxReadsLen; i++) readsLens[i] = 0; - for (auto item: mTotalReadsLen) { - readsLens[(item + Ppre - 1) / Ppre]++; + for (int i = 0; i < countLenNum; i++) { + readsLens[(mTotalReadsLen[i] + Ppre - 1) / Ppre]++; } } diff --git a/src/tgsstate.h b/src/tgsstate.h index d926e3a..3564cea 100644 --- a/src/tgsstate.h +++ b/src/tgsstate.h @@ -16,7 +16,7 @@ class TGSStats { public: - TGSStats(int minLen); + TGSStats(int minLen, bool isMerge = false); ~TGSStats(); @@ -24,8 +24,6 @@ class TGSStats { void print(); - void tgsStatRead(neoReference &ref, bool isPhred64); - void CalReadsLens(); bool isLongRead(); @@ -40,14 +38,17 @@ class TGSStats { int base2num(std::string base); -private: +public: int mMinlen; int mHalfMinlen; - std::vector mLengths; + //std::vector mTotalReadsLen; + int *mTotalReadsLen; - std::vector mTotalReadsLen; + int countLenNum; + + int *readsLens; int64_t *head_seq_pos_count[4]; @@ -57,8 +58,6 @@ class TGSStats { int64_t *tail_qual_sum; - int *readsLens; - int mMaxReadsLen; double mAvgReadsLen; diff --git a/src/tuple_spawn.hpp b/src/tuple_spawn.hpp new file mode 100644 index 0000000..e348ac9 --- /dev/null +++ b/src/tuple_spawn.hpp @@ -0,0 +1,71 @@ +#pragma once +#include +#ifdef __sw_slave__ +#include "dma_funcs.hpp" +namespace cxx11_apply +{ + template + struct integer_sequence + { + }; + template + struct GenSeq : GenSeq + { + }; + template + struct GenSeq<0, Is...> + { + typedef integer_sequence type; + }; + template + void __apply(Tt t, Tf f, integer_sequence seq) + { + f(std::get(t)...); + } + template + void apply(void (*F)(Ts...), std::tuple tuple) + { + typename GenSeq::type a; + __apply(tuple, F, a); + } +} + +template +__always_inline void call_tuple(void (*f)(Ts...), Ts ...arg){ + f(arg...); +} +template +__always_inline void __tuple_spawn_proxy(std::tuple *arg) { + cxx11_apply::apply(call_tuple, *arg); +} +template +__attribute__((noinline)) void tuple_spawn_proxy(std::tuple *arg) { + std::tuple ldm_arg; + dma_getn(arg, &ldm_arg, 1); + cxx11_apply::apply(call_tuple, ldm_arg); +} +template +void tmpl_entrance(void (*f)(Ts...)){ + __asm__ __volatile__ ("" + : + :"r"(tuple_spawn_proxy) + : "memory"); +} +#define ATHREAD_VISIBLE(x) template void tmpl_entrance(decltype(x)*); + +#endif +#ifdef __sw_host__ +extern "C"{ +#include +} +#include + +template +extern void slave_tuple_spawn_proxy(std::tuple*); +template +void athread_spawn_tupled(void (*f)(Ts...), Ts ...args) { + auto arg = std::tuple(f, args...); + __real_athread_spawn((void*)slave_tuple_spawn_proxy, &arg); +} + +#endif \ No newline at end of file diff --git a/xlink.py b/xlink.py new file mode 100644 index 0000000..bb3b862 --- /dev/null +++ b/xlink.py @@ -0,0 +1,272 @@ +import traceback +import struct +import sys +import re +import collections + +ELF64_HDR_FMT = "10sHHIQQQIHHHHHH" +ELF64_Hdr = collections.namedtuple('ELF64_Hdr', ['e_ident', 'e_type', 'e_machine', 'e_version', + 'e_entry', 'e_phoff', 'e_shoff', 'e_flags', + 'e_ehsize', 'e_phentsize', 'e_phnum', 'e_shentsize', + 'e_shnum', 'e_shstrndx']) +ELF64_SHDR_FMT = "IIQQQQIIQQ" +ELF64_Shdr = collections.namedtuple('ELF64_Shdr', ['sh_name', 'sh_type', 'sh_flags', 'sh_addr', + 'sh_offset', 'sh_size', 'sh_link', 'sh_info', + 'sh_addralign', 'sh_entsize']) +ELF64_ST_BIND = lambda x: (x & 0xff) >> 4 +ELF64_ST_TYPE = lambda x: x & 0xf +ELF64_ST_INFO = lambda bind, type: bind << 4 | type & 0xf + +STB_LOCAL = 0 # Local symbol */ +STB_GLOBAL = 1 # Global symbol */ +STB_WEAK = 2 # Weak symbol */ +STB_NUM = 3 # Number of defined types. */ +STB_LOOS = 10 # Start of OS-specific */ +STB_GNU_UNIQUE = 10 # Unique symbol. */ +STB_HIOS = 12 # End of OS-specific */ +STB_LOPROC = 13 # Start of processor-specific */ +STB_HIPROC = 15 # End of processor-specific */ + +STT_NOTYPE = 0 # Symbol type is unspecified */ +STT_OBJECT = 1 # Symbol is a data object */ +STT_FUNC = 2 # Symbol is a code object */ +STT_SECTION = 3 # Symbol associated with a section */ +STT_FILE = 4 # Symbol's name is file name */ +STT_COMMON = 5 # Symbol is a common data object */ +STT_TLS = 6 # Symbol is thread-local data object*/ +STT_NUM = 7 # Number of defined types. */ +STT_LOOS = 10 # Start of OS-specific */ +STT_GNU_IFUNC = 10 # Symbol is indirect code object */ +STT_HIOS = 12 # End of OS-specific */ +STT_LOPROC = 13 # Start of processor-specific */ +STT_HIPROC = 15 # End of processor-specific */ + + +#define STN_UNDEF 0 /* End of a chain. */ + +ELF64_SYM_FMT = "IBBHQQ" +ELF64_Sym = collections.namedtuple('ELF64_Sym', ['st_name', 'st_info', 'st_other', 'st_shndx', 'st_value', 'st_size']) + +class ELF64: + def __init__(self, bin): + bin = bytearray(bin) + ehdr = ELF64_Hdr(*struct.unpack(ELF64_HDR_FMT, bin[0:64])) + self.bin = bin + self.ehsize = ehdr.e_ehsize + self.phoff = ehdr.e_phoff + self.phnum = ehdr.e_phnum + self.phentsize = ehdr.e_phentsize + self.shoff = ehdr.e_shoff + self.shnum = ehdr.e_shnum + self.shentsize = ehdr.e_shentsize + + shdr_end = self.shoff + self.shnum * self.shentsize + shdr = list(map(lambda x: ELF64_Shdr(*x), struct.iter_unpack(ELF64_SHDR_FMT, bin[self.shoff : shdr_end]))) + shstrtabhdr = shdr[ehdr.e_shstrndx] + #shstrdict = ELF64.split_strtab( + shstrtab = ELF64.get_section_bin(bin, shstrtabhdr) + self.shstrtab = shstrtab + #print(shstrtab[341:351], len(shstrtab)) + #print("TTT:", shstrtab[341:20]) + self.shdrs = {} + self.ishdrs = [] + for hdr in shdr: + # print(hdr, hdr.sh_name, ELF64.get_str_from_tab(shstrtab, hdr.sh_name)) + self.shdrs[ELF64.get_str_from_tab(shstrtab, hdr.sh_name)] = hdr + self.ishdrs.append(hdr) + symtabhdr = self.shdrs['.symtab'] + symtabbin = ELF64.get_section_bin(bin, symtabhdr) + syms = list(map(lambda x: ELF64_Sym(*x), struct.iter_unpack(ELF64_SYM_FMT, symtabbin))) + strtabhdr = self.shdrs['.strtab'] + #strdict = ELF64.split_strtab( + + strtab = ELF64.get_section_bin(bin, strtabhdr) + self.strtab = strtab + #self.sym_name = {} + self.syms = list(map(lambda sym: sym._replace(st_name = ELF64.get_str_from_tab(strtab, sym.st_name)), syms)) + self.global_syms = {} + for sym in self.syms: + if ELF64_ST_BIND(sym.st_info) in [STB_GLOBAL, STB_WEAK]: + self.global_syms[sym.st_name] = sym + @staticmethod + def get_str_from_tab(tab, index): + return tab[index:].split(b"\0", 1)[0].decode() + + @staticmethod + def get_section_bin(elf_bin, sec_hdr): + return elf_bin[sec_hdr.sh_offset : sec_hdr.sh_offset + sec_hdr.sh_size] +import enum +class SW5Rel(enum.Enum): + NONE = 0 # /* No reloc */ + REFLONG = 1 # /* Direct 32 bit */ + REFQUAD = 2 # /* Direct 64 bit */ + GPREL32 = 3 # /* GP relative 32 bit */ + LITERAL = 4 # /* GP relative 16 bit w/optimization */ + LITUSE = 5 # /* Optimization hint for LITERAL */ + GPDISP = 6 # /* Add displacement to GP */ + BRADDR = 7 # /* PC+4 relative 23 bit shifted */ + HINT = 8 # /* PC+4 relative 16 bit shifted */ + SREL16 = 9 # /* PC relative 16 bit */ + SREL32 = 10 # /* PC relative 32 bit */ + SREL64 = 11 # /* PC relative 64 bit */ + GPRELHIGH = 17 # /* GP relative 32 bit, high 16 bits */ + GPRELLOW = 18 # /* GP relative 32 bit, low 16 bits */ + GPREL16 = 19 # /* GP relative 16 bit */ + COPY = 24 # /* Copy symbol at runtime */ + GLOB_DAT = 25 # /* Create GOT entry */ + JMP_SLOT = 26 # /* Create PLT entry */ + RELATIVE = 27 # /* Adjust by program base */ + TLS_GD_HI = 28 + TLSGD = 29 + TLS_LDM = 30 + DTPMOD64 = 31 + GOTDTPREL = 32 + DTPREL64 = 33 + DTPRELHI = 34 + DTPRELLO = 35 + DTPREL16 = 36 + GOTTPREL = 37 + TPREL64 = 38 + TPRELHI = 39 + TPRELLO = 40 + TPREL16 = 41 + LDMLO = 42 + LDMHI = 43 +RA = 26 +T12 = 27 +GP = 29 +LDI = 0x3e +LDIH = 0x3f +LDL = 0x23 +MANGLE_RE = re.compile(r"_Z(?PN?)(?P\d+)slave_(?P.*)") +def remangle_slave(mangled_name): + m = MANGLE_RE.match(mangled_name) + if m: + remangle = "slave__Z%s%d%s" % (m["NS"], int(m["len"]) - 6, m["rest"]) + return remangle + else: + return None +import subprocess +import os +verbose = False +def process_text1_reloc(elf, ipass): + text1_offset = elf.shdrs['.text1'].sh_offset + text1_addr = elf.shdrs['.text1'].sh_addr + text1_size = elf.shdrs['.text1'].sh_size + got_offset = elf.shdrs['.got'].sh_offset + got_addr = elf.shdrs['.got'].sh_addr + nonliterals = set([]) + if '.rela.text1' in elf.shdrs: + relatext1_offset = elf.shdrs[".rela.text1"].sh_offset + relatext1_size = elf.shdrs[".rela.text1"].sh_size + for i in range(relatext1_offset, relatext1_offset + relatext1_size, 24): + addr, info, off = struct.unpack("> 32 + reltype = SW5Rel(info & 63) + if reltype != SW5Rel.LITERAL: + nonliterals.add(addr) + if ipass == 2: + got1_offset = elf.global_syms['__got1'].st_value - elf.shdrs['.ldm'].sh_addr + elf.shdrs['.ldm'].sh_offset + got1_addr = elf.global_syms['__got1'].st_value - 0x5000004000 + got1_size = elf.global_syms['__got1'].st_size + def get_inst(i): + return struct.unpack('I', elf.bin[i:i+4])[0] + got1_map = {} + for i in range(0, text1_size, 4): + # inst = sw5insts.parse_inst(get_inst(i)) + inst = get_inst(text1_offset + i) + op = inst >> 26 + ra = inst >> 21 & 31 + rb = inst >> 16 & 31 + disp = inst & 65535 + if disp > 32767: + disp -= 65536 + if op == LDIH and ra == GP and rb in (T12, RA, GP): + gpval = i + text1_addr + disp * 65536 + if op == LDI and ra == GP and rb == GP: + gpval = gpval + disp + if op == LDL and rb == GP and i + text1_addr not in nonliterals: + target_addr = gpval + disp + target_offset = target_addr - got_addr + got_offset + val = struct.unpack('Q', elf.bin[target_offset:target_offset + 8])[0] + if val not in got1_map: + got1_map[val] = len(got1_map) + idx = got1_map[val] + if ipass == 2: + elf.bin[got1_offset + idx * 8: got1_offset + idx * 8 + 8] = struct.pack('Q', val) + if ipass == 2: + ldm_disp = got1_addr + got1_map[val] * 8 + inst_new = LDL << 26 | ra << 21 | 31 << 16 | ldm_disp & 65535 + elf.bin[text1_offset + i:text1_offset+i+4] = struct.pack('I', inst_new) + return len(got1_map) +def remangle_defs(elf): + ret = [] + if '.rela.text' in elf.shdrs: + relatext1_offset = elf.shdrs[".rela.text"].sh_offset + relatext1_size = elf.shdrs[".rela.text"].sh_size + for i in range(relatext1_offset, relatext1_offset + relatext1_size, 24): + addr, info, off = struct.unpack("> 32 + reltype = SW5Rel(info & 63) + target_sym = elf.syms[target] + if target_sym.st_shndx < len(elf.ishdrs) and elf.ishdrs[target_sym.st_shndx].sh_type == 0: + if remangle_slave(elf.syms[target].st_name): + ret.append('--defsym=%s=%s' % (elf.syms[target].st_name, remangle_slave(elf.syms[target].st_name))) + return ret +def run(argv_link, add_args=[], **kwargs): + if os.path.basename(argv_link[0]) != 'sw5ld': + add_args = list(map(lambda x: "-Wl,"+x, add_args)) + if (verbose): + print(argv_link + add_args) + retcode = subprocess.run(argv_link + add_args, **kwargs).returncode + if retcode != 0: + print('\033[91mexecution of %s failed\033[0m' % str(argv_link + add_args)) + sys.exit(retcode) +def get_output(argv): + for i, arg in enumerate(argv): + if arg == '-o': + return argv[i+1] + return "./a.out" +def write_elf(path, binary): + open(path, "wb").write(binary) + import os + import stat + st = os.stat(path) + os.chmod(path, st.st_mode | stat.S_IEXEC) +if __name__ == '__main__': + import sys + import tempfile + if len(sys.argv) == 1 or sys.argv[1] == '-h': + print("xlink.py [-v] ", file=sys.stderr) + sys.exit(1) + verbose = False + arg_offset = 1 + if sys.argv[1] == '-v': + verbose = True + arg_offset = 2 + tempdir = tempfile.mkdtemp(prefix='xlink-') + try: + linker = sys.argv[arg_offset] + argv_link = sys.argv[arg_offset:].copy() + dest = get_output(argv_link) + if os.path.exists(dest): + os.remove(dest) + pass1_elf = os.path.join(tempdir, 'pass1_elf') + add_args_pass0 = ['-q', '--unresolved-symbols=ignore-all', '-o', pass1_elf] + run(argv_link, add_args_pass0) + elf1 = ELF64(open(pass1_elf, "rb").read()) + nliterals = process_text1_reloc(elf1, ipass=1) + + got1_c = os.path.join(tempdir, 'got1.c') + open(got1_c, "w").write('__attribute__((section(".ldm"))) long __got1[%d];\n' % nliterals) + run(['sw5gcc', '-mslave', '-c', 'got1.c'], cwd=tempdir) + print("\033[93mGenerating a subset of global offset table in LDM of %dB\033[0m" % (nliterals * 8)) + pass2_elf = os.path.join(tempdir, 'pass2_elf') + add_args_pass2 = [os.path.join(tempdir, 'got1.o'), '-o', pass2_elf, '-q'] + remangle_defs(elf1) + run(argv_link, add_args_pass2) + elf2 = ELF64(open(pass2_elf, "rb").read()) + process_text1_reloc(elf2, ipass=2) + write_elf(dest, elf2.bin) # print(nliterals) + finally: + import shutil + shutil.rmtree(tempdir)