Skip to content

Commit

Permalink
fixed SequenceLib::Read
Browse files Browse the repository at this point in the history
  • Loading branch information
voutcn committed Jun 30, 2019
1 parent 897614a commit e7e9c11
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 28 deletions.
6 changes: 4 additions & 2 deletions src/kmlib/kmsort.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace internal {

template <class RandomIt, class ValueType, class RadixTraits>
inline void insert_sort_core(RandomIt s, RandomIt e, RadixTraits rt) {
for (RandomIt i = s + 1; i < e; ++i) {
for (RandomIt i = s + 1; i != e; ++i) {
if (rt(*i, *(i - 1))) {
RandomIt j;
ValueType tmp = *i;
Expand Down Expand Up @@ -97,7 +97,9 @@ inline void radix_sort_core(RandomIt s, RandomIt e, RadixTrait rt, int byte_inde

template <class RandomIt, class ValueType, class RadixTraits>
inline void radix_sort_entry(RandomIt s, RandomIt e, ValueType *, RadixTraits radix_traits) {
if (e - s <= kmsortconst::kInsertSortThreshold) {
if (std::distance(s, e) <= 1) {
return;
} else if (std::distance(s, e) <= kmsortconst::kInsertSortThreshold) {
insert_sort_core<RandomIt, ValueType, RadixTraits>(s, e, radix_traits);
} else {
const int kByteIndexEnd = RadixTraits::n_bytes > 8 ? RadixTraits::n_bytes - 8 : 0;
Expand Down
17 changes: 9 additions & 8 deletions src/sequence/io/async_sequence_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "sequence/io/contig/contig_reader.h"
#include "sequence/sequence_package.h"

template <class PackageType>
template<class PackageType>
class BaseAsyncSequenceReader {
public:
using package_type = PackageType;
Expand Down Expand Up @@ -43,22 +43,23 @@ class BaseAsyncSequenceReader {

class AsyncSequenceReader : public BaseAsyncSequenceReader<SeqPackage> {
public:
explicit AsyncSequenceReader(BaseSequenceReader *reader): reader_(reader) {
explicit AsyncSequenceReader(BaseSequenceReader *reader, bool reverse = false)
: reader_(reader), reverse_(reverse) {
AsyncReadNextBatch();
}
~AsyncSequenceReader() override { StopReading(); }

protected:
void ReadOneBatch(SeqPackage *seq_pkg) override {
int64_t kMaxNumReads = 1u << 22;
int64_t kMaxNumBases = 1u << 28;
bool reverse = false;
int64_t kMaxNumReads = 1u << 22u;
int64_t kMaxNumBases = 1u << 28u;
seq_pkg->Clear();
reader_->Read(seq_pkg, kMaxNumReads, kMaxNumBases, reverse);
reader_->Read(seq_pkg, kMaxNumReads, kMaxNumBases, reverse_);
}

private:
BaseSequenceReader *reader_;
bool reverse_;
};

class AsyncContigReader : public BaseAsyncSequenceReader<std::pair<SeqPackage, std::vector<float>>> {
Expand All @@ -73,8 +74,8 @@ class AsyncContigReader : public BaseAsyncSequenceReader<std::pair<SeqPackage, s
void ReadOneBatch(package_type *pkg) override {
pkg->first.Clear();
pkg->second.clear();
const int64_t kMaxNumContigs = 1u << 22;
const int64_t kMaxNumBases = 1u << 28;
const int64_t kMaxNumContigs = 1u << 22u;
const int64_t kMaxNumBases = 1u << 28u;
const bool reverse = false;
reader_.ReadWithMultiplicity(&pkg->first, &pkg->second, kMaxNumContigs, kMaxNumBases, reverse);
}
Expand Down
24 changes: 11 additions & 13 deletions src/sequence/io/sequence_lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,39 +41,37 @@ void SequenceLibCollection::Build(const std::string &lib_file, const std::string
xfatal("Valid types: pe, se, interleaved\n");
}

int64_t start = total_reads;
int64_t num_read = 0;
int64_t begin_index = total_reads;
unsigned max_read_len = 0;

AsyncSequenceReader async_reader(reader.get());

while (true) {
auto &seq_batch = async_reader.Next();
const auto &seq_batch = async_reader.Next();
if (seq_batch.seq_count() == 0) {
break;
}

total_reads += num_read;
total_reads += seq_batch.seq_count();
total_bases += seq_batch.base_count();
seq_batch.WriteSequences(bin_file);
max_read_len = std::max(max_read_len, seq_batch.max_length());
seq_batch.Clear();
}

if (type == "pe" && (total_reads - start) % 2 != 0) {
xerr("PE library number of reads is odd: {}!\n", total_reads - start);
xfatal("File(s): {}\n", metadata.c_str());
if (type == "pe" && (total_reads - begin_index) % 2 != 0) {
xerr("PE library number of reads is odd: {}!\n", total_reads - begin_index);
xfatal("File(s): {s}\n", metadata.c_str());
}

if (type == "interleaved" && (total_reads - start) % 2 != 0) {
xerr("PE library number of reads is odd: {}!\n", total_reads - start);
xfatal("File(s): {}\n", metadata.c_str());
if (type == "interleaved" && (total_reads - begin_index) % 2 != 0) {
xerr("PE library number of reads is odd: {}!\n", total_reads - begin_index);
xfatal("File(s): {s}\n", metadata.c_str());
}

xinfo("Lib {} ({s}): {s}, {} reads, {} max length\n", libs.size(), metadata.c_str(), type.c_str(),
total_reads - start, max_read_len);
total_reads - begin_index, max_read_len);

libs.emplace_back(nullptr, start, total_reads, max_read_len, type != "se", metadata);
libs.emplace_back(nullptr, begin_index, total_reads, max_read_len, type != "se", metadata);
std::getline(lib_config, metadata); // eliminate the "\n"
}

Expand Down
2 changes: 1 addition & 1 deletion src/sequence/sequence_package.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class SequencePackage {
pos_to_id_.push_back(seq_count());
}

void WriteSequences(std::ostream &os, int64_t from = 0, int64_t to = -1) {
void WriteSequences(std::ostream &os, int64_t from = 0, int64_t to = -1) const {
if (to == -1) {
to = seq_count() - 1;
}
Expand Down
2 changes: 1 addition & 1 deletion src/sorting/read_to_sdbg_s1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ Read2SdbgS1::MemoryStat Read2SdbgS1::Initialize() {

for (int i = 0; i < seq_pkg_->n_mercy_files; ++i) {
auto file_name = opt_.output_prefix + ".mercy_cand." + std::to_string(i);
mercy_files_.emplace_back(xfopen(file_name.c_str(), "wb"));
mercy_files_.push_back(xfopen(file_name.c_str(), "wb"));
}

// --- initialize stat ---
Expand Down
2 changes: 1 addition & 1 deletion src/sorting/read_to_sdbg_s2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ Read2SdbgS2::MemoryStat Read2SdbgS2::Initialize() {
mercy_cand.insert(mercy_cand.end(), buf, buf + size_read / sizeof(uint64_t));
}

xinfo("Mercy file: {}, {}\n", file_name.c_str(), mercy_cand.size());
xinfo("Mercy file: {s}, {}\n", file_name.c_str(), mercy_cand.size());
kmlib::kmsort(mercy_cand.begin(), mercy_cand.end());

// multi threading
Expand Down
4 changes: 2 additions & 2 deletions src/utils/buffered_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class BufferedReader {
public:
static constexpr size_t kMaxBufferSize = 65536;
explicit BufferedReader() = default;
void reset(std::ifstream *is, size_t buffer_size = kMaxBufferSize) {
void reset(std::istream *is, size_t buffer_size = kMaxBufferSize) {
is_ = is;
head_ = tail_ = 0;
buffer_size_ = std::min(buffer_size, kMaxBufferSize * 1);
Expand Down Expand Up @@ -57,7 +57,7 @@ class BufferedReader {
}

private:
std::ifstream *is_{};
std::istream *is_{};
char buffer_[kMaxBufferSize]{};
size_t buffer_size_{kMaxBufferSize};
size_t head_{0};
Expand Down

0 comments on commit e7e9c11

Please sign in to comment.