Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Sunmile committed Apr 16, 2019
0 parents commit 390e1d8
Show file tree
Hide file tree
Showing 72 changed files with 23,519 additions and 0 deletions.
11 changes: 11 additions & 0 deletions Feature construction/Feature_code.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
function code=Feature_code(SR,SD,seq)
[m,n]=size(seq);
code=[];
for i=1:m
a=all_negative(i,1);
b=all_negative(i,2);
code_miRNA_disease=[SR(a,:),SD(b,:)];
code=[code;code_miRNA_disease];

end
end
24 changes: 24 additions & 0 deletions Feature construction/Feature_extraction.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
function feature=Feature_extraction(Positive,Negative)%samples with labels
[m1,n1]=size(Positive);
[m2,n2]=size(Negative);
csp=zeros(n1,1);
csn=zeros(n2,1);
sps=zeros(n1,1);
for i=1:n1
csp(i)=sum(Positive(:,i));
csn(i)=sum(Negative(:,i));
sps(i)=(csp(i)+csn(i))*log(m1/csp(i)+m2/csn(i));
end
[spssort, index] = sort(sps,'descend');
[m,n]=size(index);
micfeature=[];
disfeature=[];
for j=1:m
if 1<=index(j)&&index(j)<=2204
micfeature=[micfeature;index(j)];
else
disfeature=[disfeature;index(j)];
end
end
feature=[micfeature(1:100);disfeature(1:100)];
end
43 changes: 43 additions & 0 deletions Predictors/IMPMD/linear_regression.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[m1,n1]=size(Positive_200);
[m2,n2]=size(Negative_200);
Y=[ones(m1,1);zeros(m2,1)]; Y=Y(rank);
feature=[prdY_svm,prdY_knn,prdY_reg];
[m3,n3]=size(feature);
b=regress(Y,feature);
prdY=zeros(m3,1);
for i=1:size(feature,1)
prdY(i)=b'*feature(i,:)';
end
for i=1:length(prdY)
if prdY(i)>=0.5 %¿Éµ÷
predictedY(i,:)=1;
else
predictedY(i,:)=-1;
end
end
%%%%%%%%%%%%%%%%%%roc curve
targets=Y;
targets1=[];
targets1=[targets1;Y];
zz=find(targets1==0);
for i=1:length(zz)
targets1(zz(i))=-1;
end
[tpr,fpr,thresholds] = roc(targets',prdY');
AUCbb=AUCwang(tpr,fpr);
hold on, plot(fpr,tpr)


P=size(find(targets1==1),1);
TP=size(find(predictedY==targets1 & predictedY==1),1);
FN=size(find(predictedY~=targets1&targets1==1),1);
N=size(find(targets1==-1),1);
TN=size(find(predictedY==targets1&predictedY==-1),1);
FP=size(find(predictedY~=targets1&targets1==-1),1);

acc=length(find(predictedY==targets1))/length(targets1);
sen=TP/(P);
spe=TN/(N);
mcc=(TP*TN-FP*FN)/sqrt((TP+FN)*(TP+FP)*(TN+FP)*(TN+FN));
auc=AUCbb;
pre=TP/(TP+FP);
27 changes: 27 additions & 0 deletions Predictors/KNN/KNN.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
function PredictY=KNN(K,trainset,testset)
[trainm,trainn]=size(trainset);
[testm,~]=size(testset);
PredictY=zeros(testm,1);
distancev=zeros(trainm,1);
for i=1:testm
for j=1:trainm
distancev(j)=0;
for k=1:trainn-1
distancev(j)=distancev(j)+(testset(i,k)-trainset(j,k))^2;
end
distancev(j)=sqrt(distancev(j));
end
[~,val]=sort(distancev);
val=val(1:K);
class1=0;
class2=0;
for k=1:size(val,1)
if trainset(val(k),end)==1
class1=class1+1;
else
class2=class2+1;
end
end
PredictY(i)=class1/K;
end
end
70 changes: 70 additions & 0 deletions Predictors/KNN/Kfold_KNN.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
function [acc,mcc,sen,spe,auc,pre,prdY,fpr,tpr,predictedY]=Kfold_KNN(rank,Xpos,Xneg,pth,cv,K)
% [acc,mcc,sen,spe,auc,pre,prdY,fpr,tpr,predictedY]=crossvalidation(randperm(60)',Positive_200(1:30,:),Negative_200(1:30,:),0.5,10,3);
[m1,n1]=size(Xpos);
[m2,n2]=size(Xneg);
X=[Xpos;Xneg];
Y=[ones(m1,1);-ones(m2,1)];
[m3,n3]=size(X);
% p=randperm(m3)';
X=X(rank,:);
Y=Y(rank);
K_neighbor=K;
prdY=[]; predictedY=[];
targets=[];
predictedY=[];
Ytest=[];
kfold=cv;
t=fix(m3/kfold);
for k=1:kfold
if k==kfold
train_data=X(1:t*(k-1),:);
test_data=X(t*(k-1)+1:m3,:);
train_target=Y(1:t*(k-1),:);
test_target=Y(t*(k-1)+1:m3,:);

else
train_data=[X(1:t*(k-1),:);X(t*k+1:m3,:)];
test_data=X(t*(k-1)+1:t*k,:);
train_target=[Y(1:t*(k-1));Y(t*k+1:m3)];
test_target=Y(t*(k-1)+1:t*k,:);
end
prdY_k=KNN( K_neighbor,[ train_data,train_target],[test_data,test_target]);
prdY=[prdY;prdY_k];
% test_target=[test_target;test_target];
end

for i=1:length(prdY)
if prdY(i)>=pth
predictedY(i,:)=1;
else
predictedY(i,:)=-1;
end
end
%%%%%%%%%%%%%%%%%%roc curve
targets=Y;
targets1=[];
targets1=[targets1;Y];
zz=find(targets==-1);
for i=1:length(zz)
targets(zz(i))=0;
end
[tpr,fpr,thresholds] = roc(targets',prdY');
AUCbb=AUCwang(tpr,fpr);
hold on, plot(fpr,tpr)


P=size(find(targets1==1),1);
TP=size(find(predictedY==targets1 & predictedY==1),1);
FN=size(find(predictedY~=targets1&targets1==1),1);
N=size(find(targets1==-1),1);
TN=size(find(predictedY==targets1&predictedY==-1),1);
FP=size(find(predictedY~=targets1&targets1==-1),1);

acc=length(find(predictedY==targets1))/length(targets1);
sen=TP/(P);
spe=TN/(N);
mcc=(TP*TN-FP*FN)/sqrt((TP+FN)*(TP+FP)*(TN+FP)*(TN+FN));
auc=AUCbb;
pre=TP/(TP+FP);
end

90 changes: 90 additions & 0 deletions Predictors/LR/Kfold_regression.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
function [acc,mcc,sen,spe,auc,pre,prdY,fpr,tpr,predictedY]=Kfold_regression(Xpos,Xneg,pth,cv, alpha, threshold, maxTimes)

[m1,n1]=size(Xpos);
[m2,n2]=size(Xneg);
X=[Xpos;Xneg];
Y=[ones(m1,1);-ones(m2,1)];
[m3,n3]=size(X);
% p=randperm(m3)';
% X=X(p,:);
% Y=Y(p);
prdY=[];
targets=[];
predictedY=[];
indices=crossvalind('Kfold',X(1:m3,n3),cv);
for k=1:cv
test = (indices == k);
train = ~test;
train_data=X(train,:);
train_target=Y(train,:);
test_data=X(test,:);
test_target=Y(test,:);
% maxIndex=n1;
% [theta,prdY_k]=logisticRegression([train_data,train_target], maxIndex, alpha, threshold, maxTimes);
% prdY=[prdY;prdY_k];
trainset=[train_data,train_target];
testset=[test_data,test_target];
dataSize = size(trainset);
dataLen = dataSize(1);
paramLen = dataSize(2);
maxIndex=paramLen-1;
theta = zeros(paramLen, 1);
times = 0;
cost0 = 0;
cost1 = 1;
while abs(cost1-cost0) > threshold && times < maxTimes
times = times + 1;
theta0 = theta;
cost0 = LogisticRegressionCostFun(theta, trainset);
for i = 1 : dataLen
tmp = ((1 / (1 + exp(-theta0' * [1, trainset(i,1:maxIndex)]'))) - trainset(i, paramLen)) / dataLen;
theta(1) = theta(1) - alpha * tmp;
for j = 2 : paramLen
theta(j) = theta(j) - alpha * tmp * trainset(i, j - 1);
end
end
cost1 = LogisticRegressionCostFun(theta, trainset);
end
[m,n]=size(testset);
predY_k=zeros(m,1);
for kk=1:m
x=testset(kk,1:maxIndex);
predY_k(kk)=1/(1+exp(-theta'*[1,x]'));
end
prdY=[prdY;prdY_k];
end
for i=1:length(prdY)
if prdY(i)>=pth
predictedY(i,:)=1;
else
predictedY(i,:)=-1;
end
end
%%%%%%%%%%%%%%%%%%roc curve
targets=[targets;Ytest];
targets1=[];
targets1=[targets1;Ytest];
zz=find(targets==-1);
for i=1:length(zz)
targets(zz(i))=0;
end
[tpr,fpr,thresholds] = roc(targets',prdY');
AUCbb=AUCwang(tpr,fpr);
hold on, plot(fpr,tpr)


P=size(find(targets1==1),1);
TP=size(find(predictedY==targets1 & predictedY==1),1);
FN=size(find(predictedY~=targets1&targets1==1),1);
N=size(find(targets1==-1),1);
TN=size(find(predictedY==targets1&predictedY==-1),1);
FP=size(find(predictedY~=targets1&targets1==-1),1);

acc=length(find(predictedY==targets1))/length(targets1);
sen=TP/(P);
spe=TN/(N);
mcc=(TP*TN-FP*FN)/sqrt((TP+FN)*(TP+FP)*(TN+FP)*(TN+FN));
auc=AUCbb;
pre=TP/(TP+FP);
end

18 changes: 18 additions & 0 deletions Predictors/LR/LogisticRegressionCostFun.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
function cost=LogisticRegressionCostFun(theta, data)



paramLen = length(theta);
X = zeros(paramLen, 1);
dataSize = size(data);
dataLen = dataSize(1);
cost = 0;
for i = 1 : dataLen
X(1) = 1;
for k = 1 : paramLen - 1
X(k + 1) = data(i, k);
end
cost = cost +(-data(i, dataSize(2)) * log(1/(1 + exp(-(theta' * X)))) - (1 - data(i, dataSize(2))) * log(1 - 1/(1 + exp(-(theta' * X)))));
end
cost = cost / dataLen;
end
48 changes: 48 additions & 0 deletions Predictors/LR/logisticRegression.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
% function theta=logisticRegression(x,y)

% % x = [1,2,3;2,3,4;9,8,1;4,5,6;5,6,7;2,6,10;1,5,10];
% % y = [0, 0, 1, 0 ,0 , 1, 1]';
% [m,n]=size(x);
% theta = glmfit(x, [y ones(m,1)], 'binomial', 'link', 'logit')
% end
function [theta,predictY]=logisticRegression(trainset, testset,maxIndex, alpha, threshold, maxTimes)


% data = [0 0 0; 0 1 0; 0 1.5 0; 0.5 0.5 0; 0.5 1 0; 1 0.95 0; 0.5 1.4 0; 1.5 0.51 0; 2 0 0;
%
% 1.9 0 0; 0 3 1; 0 2.1 1; 0.5 1.8 1; 0.8 1.5 1; 1 1.2 1; 1.5 2 1; 3 0 1; 3 1 1; 2 2 1;
%
% 3 4 1; 1.8 0.5 1];
%
% maxIndex = 2;
% alpha = 0.1;
% threshold = 0.00001;
% maxTimes = 1000;

dataSize = size(trainset);
dataLen = dataSize(1);
param = maxIndex + 1;
theta = zeros(param, 1);
times = 0;
cost0 = 0;
cost1 = 1;
while abs(cost1-cost0) > threshold && times < maxTimes
times = times + 1;
theta0 = theta;
cost0 = LogisticRegressionCostFun(theta, trainset);
for i = 1 : dataLen
tmp = ((1 / (1 + exp(-theta0' * [1, trainset(i,1:maxIndex)]'))) - trainset(i, param)) / dataLen;
theta(1) = theta(1) - alpha * tmp;
for j = 2 : param
theta(j) = theta(j) - alpha * tmp * trainset(i, j - 1);
end
end
cost1 = LogisticRegressionCostFun(theta, trainset);
end
[m,n]=size(testset);
predictY=zeros(m,1);
for k=1:m
x=testset(k,1:maxIndex);
predictY(k)=1/(1+exp(-theta'*[1,x]'));
end
end
7 changes: 7 additions & 0 deletions Predictors/SVM/AUCwang.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
function AUCbb=AUCwang(tpr,fpr)

for i=1:length(tpr)-1
r(i)=(fpr(i+1)-fpr(i))*tpr(i);
s(i)=0.5*(fpr(i+1)-fpr(i))*(tpr(i+1)-tpr(i));
end
AUCbb=sum(r)+sum(s);
18 changes: 18 additions & 0 deletions Predictors/SVM/Change_format.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
function Change_format(X,file)
%X is the priminal data; file is the output filename(Note it must be a char!)
%The first colum is the label of the samples, so the dim of the sample is
%n-1 dimensions

[m,n]=size(X);
filename=file;
fid=fopen(filename,'w');
for i=1:m
fprintf(fid,'%5d ',X(i,1));
for j=2:n
fprintf(fid,'%5d: %12.4f ',j-1,X(i,j));
end
fprintf(fid,'\n');
end

fclose(fid);
clear fid i j;
Loading

0 comments on commit 390e1d8

Please sign in to comment.