-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_prep.R
123 lines (105 loc) · 3.32 KB
/
data_prep.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#' Tidy MP3 or Wave
#'
#' Apply transformations for modelling ready audio files.
#'
#' The transformations are:
#'
#' - mp3 to wav
#' - normalization
#' - stereo to mono
#'
#' @param audio_files character vector with the full path for the mp3 files.
#' @param dest_dir optional atomic character indicating the path where to store
#' the files. Default is a tempdir(). Set NULL if you want the dest dir be the
#' same as origin (warning: it will overwrite the original wave files!)
#' @param sample_rate integer. Sample rate.
#' @param bits integer.
#'
#' @return a character vector of the same size of audio_files with the file names of the transformed wavs.
#'
#' @export
#'
#' @examples
#'
#' mp3_dir <- system.file("mp3_sample", package = "mestrado")
#' mp3_files <- list.files(mp3_dir, full.names = TRUE)
#'
#' wav_files <- tidy_audio(mp3_files)
tidy_audio_dir <- function(audio_files, dest_dir = tempdir(), sample_rate = 16000, bits = 16, parallel = 1) {
# if dir is set, make sure it exists
if(!is.null(dest_dir)) {
dest_dir <- paste0(dest_dir, "/")
if(!dir.exists(dest_dir)) {
dir.create(dest_dir)
}
}
# fix windows '\\' issue
dest_dir <- gsub("\\\\", "/", dest_dir)
new_audio_files <- character(length(audio_files))
pb <- progress::progress_bar$new(total = length(audio_files))
for(i in seq_along(audio_files)) {
pb$tick()
audio_file <- audio_files[i]
file_ext <- tools::file_ext(audio_file)
# load audio file
if(file_ext == "mp3"){
wav <- tuneR::readMP3(audio_file)
} else {
wav <- tuneR::readWave(audio_file)
}
# mp3 to wav in the name file
new_audio_file <- gsub("mp3$", "wav", audio_file)
# update name for the right dest_dir to store
if(!is.null(dest_dir)) new_audio_file <- gsub("^.*/", dest_dir, new_audio_file)
wav <- tidy_audio(wav, sample_rate = sample_rate, bits = bits)
# write wave in the disk
tuneR::writeWave(wav, new_audio_file)
new_audio_files[i] <- new_audio_file
}
return(new_audio_files)
}
#' Tidy MP3 or Wave
#'
#' Apply transformations on an wav object for modelling purposes.
#'
#' The transformations are:
#'
#' - mp3 to wav
#' - normalization
#' - stereo to mono
#'
#' @param wav Wave object. Usualy returned by \code{\link[tuneR]{readMP3}}/\code{\link[tuneR]{readWave}}.
#'
#' @return a transformed Wave object.
#'
#' @export
#'
#' @examples
#'
#' library(tuneR)
#' mp3_dir <- system.file("mp3_sample", package = "mestrado")
#' mp3_files <- list.files(mp3_dir, full.names = TRUE)
#' wav <- readMP3(mp3_files[1])
#'
#' transformed_wav <- tidy_audio(wav)
tidy_audio <- function(wav, sample_rate = 16000, bits = 16) {
# normalize
wav <- tuneR::normalize(wav, unit = as.character(bits))
# downsample
if([email protected] > sample_rate) {
wav <- tuneR::downsample(wav, sample_rate)
}
wav@left <- as.numeric(wav@left) %>% tidyr::replace_na(0)
wav@right <- as.numeric(wav@right) %>% tidyr::replace_na(0)
if(wav@stereo) wav <- wav %>% tuneR::mono(which = "both")
wav@left <- as.integer(wav@left)
wav@right <- as.integer(wav@right)
# to mono
wav <- tuneR::mono(wav)
wav
}
#' see this issue https://github.com/rstudio/tensorflow/issues/402
#' sudo apt install sox
identity_resampling <- function(orig, dest = orig) {
system(glue::glue("sox {orig} -r 16000 {dest}"))
}