-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_process.m
64 lines (49 loc) · 2.38 KB
/
data_process.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
function [label, feature, ub, lb, POPULATION] = data_process(end_gen, percentage, Pop2)
% This is a data_process function by Y. Xu (Dec. 18, 2023)
% It reads data into arrays and adapts selection and rearrangement
% Initialize empty arrays to store all data
all_Score_gen = [];
all_Population_gen = [];
all_Generation_gen = [];
% Read and append data from all files
for gen_num = 1:end_gen
% Build file path
file_path = ['result/gen_' num2str(gen_num, '%.4d') '.mat'];
% Load data from .mat file using the load function
loaded_data = load(file_path);
% Extract the required variables from the loaded data structure
Score_gen = loaded_data.Score_gen;
Population_gen = loaded_data.Population_gen;
Generation_gen = loaded_data.Generation_gen;
% Append to the overall arrays
all_Score_gen = [all_Score_gen; Score_gen];
all_Population_gen = [all_Population_gen; Population_gen];
all_Generation_gen = [all_Generation_gen; Generation_gen];
end
% Sort the vector of scores
[sorted_scores, sorted_indices] = sort(all_Score_gen);
sorted_population = all_Population_gen(sorted_indices, :);
%% Data cleaning
% Find unique rows in the sorted population
[population, ~, ic] = unique(sorted_population, 'rows');
scores = zeros(size(population, 1), 1);
scores(ic) = sorted_scores(:);
%% Find new boundary
% Calculate the index corresponding to the top percentage (e.g., 30%)
index = round(percentage * numel(sorted_scores));
% Extract scores, population, and generation for the top percentage
population_top = all_Population_gen(sorted_indices(1:index), :);
% Find the maximum and minimum values in population_top
ub = max(population_top);
lb = min(population_top);
% Sort scores and rearrange population based on the sorted scores
[~, temp_indices] = sort(scores);
POPULATION = population(temp_indices(1:Pop2), :);
%% Re order for training
% Generate a random index order
rng('shuffle'); % Use the current time as the random seed
randomOrder = randperm(length(population));
% Rearrange data based on the random index order
label = scores(randomOrder);
feature = population(randomOrder, :);
end