forked from GreenleafLab/ArchR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKmer_Bias.cpp
98 lines (63 loc) · 2.07 KB
/
Kmer_Bias.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include <Rcpp.h>
using namespace Rcpp;
using namespace std;
// [[Rcpp::export]]
IntegerVector kmerIdxCpp(const std::string& str, const int window, const int n, CharacterVector &kmer){
CharacterVector result( window );
for ( int j = 0; j < window; j++ ){
result[j] = str.substr( j, n );
}
IntegerVector out = match( result , kmer );
return out;
}
// [[Rcpp::export]]
IntegerMatrix kmerPositionFrequencyCpp(StringVector &string_vector, IntegerVector &strand_vector, const int window, const int w, CharacterVector &kmer){
// Initialize Matrix
IntegerMatrix out = IntegerMatrix(kmer.size(),window);
rownames(out) = kmer;
// Get Constants
int n = string_vector.size();
std::string str_i;
//Simple Vector for Storing matches
IntegerVector m(window);
for(int i=0; i<n; i++){
str_i = string_vector[i];
// Match Kmer over window
m = kmerIdxCpp(str_i,window,w,kmer);
for(int j = 0; j < window; j++){
if(!IntegerVector::is_na(m[j])){
if(strand_vector[i] == 2){ // Minus Stranded
out( m[j] - 1, window - j - 1) = out( m[j] - 1, window - j - 1) + 1;
}else{ // Other / Plus Stranded
out( m[j] - 1, j) = out( m[j] - 1, j) + 1;
}
}
}
}
return out;
}
// [[Rcpp::export]]
IntegerMatrix kmerIDFrequencyCpp(StringVector &string_vector, IntegerVector &id_vector, const int n_id, const int window, const int w, CharacterVector &kmer){
// Initialize Matrix
IntegerMatrix out = IntegerMatrix(kmer.size() , n_id);
rownames(out) = kmer;
// Get Constants
int n = string_vector.size();
std::string str_i;
int id_i;
//Simple Vector for Storing matches
IntegerVector m(window);
for(int i = 0; i < n; i++){
str_i = string_vector[i];
id_i = id_vector[i];
// Match Kmer over window
m = kmerIdxCpp(str_i, window, w, kmer);
// Add Matched Value if not NA ie containing an N
for(int j = 0; j < window; j++){
if(!IntegerVector::is_na(m[j])){
out(m[j] - 1, id_i - 1) = out(m[j] - 1, id_i - 1) + 1;
}
}
}
return out;
}