-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio.py
160 lines (126 loc) · 4.85 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
import wave
import appbuilder
import numpy as np
import pyaudio
from .api_key import APPBUILDER_TOKEN
#
# # 配置密钥
os.environ["APPBUILDER_TOKEN"] = APPBUILDER_TOKEN
asr = appbuilder.ASR() # 语音识别组件
# 确定麦克风索引号
import sounddevice as sd
print(sd.query_devices())
print(f"识别到的录音设备")
def record(MIC_INDEX=0, DURATION=5):
'''
调用麦克风录音,需用arecord -l命令获取麦克风ID
DURATION,录音时长
'''
print('开始 {} 秒录音'.format(DURATION))
os.system(
'sudo arecord -D "plughw:{}" -f dat -c 1 -r 16000 -d {} temp/speech_record.wav'.format(MIC_INDEX, DURATION))
print('录音结束')
def record_auto(MIC_INDEX=1):
'''
开启麦克风录音,保存至'temp/speech_record.wav'音频文件
音量超过阈值自动开始录音,低于阈值一段时间后自动停止录音
MIC_INDEX:麦克风设备索引号
'''
CHUNK = 1024 # 采样宽度
RATE = 16000 # 采样率
QUIET_DB = 200 # 分贝阈值,大于则开始录音,否则结束
delay_time = 1 # 声音降至分贝阈值后,经过多长时间,自动终止录音
FORMAT = pyaudio.paInt16
CHANNELS = 1 # 1 if sys.platform == 'darwin' else 2 # 采样通道数
# 初始化录音
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=MIC_INDEX
)
frames = [] # 所有音频帧
flag = False # 是否已经开始录音
quiet_flag = False # 当前音量小于阈值
temp_time = 0 # 当前时间是第几帧
last_ok_time = 0 # 最后正常是第几帧
START_TIME = 0 # 开始录音是第几帧
END_TIME = 0 # 结束录音是第几帧
print('可以说话啦!')
while True:
# 获取当前chunk的声音
data = stream.read(CHUNK, exception_on_overflow=False)
frames.append(data)
# 获取当前chunk的音量分贝值
temp_volume = np.max(np.frombuffer(data, dtype=np.short))
if temp_volume > QUIET_DB and flag == False:
print("音量高于阈值,开始录音")
flag = True
START_TIME = temp_time
last_ok_time = temp_time
if flag: # 录音中的各种情况
if (temp_volume < QUIET_DB and quiet_flag == False):
print("录音中,当前音量低于阈值")
quiet_flag = True
last_ok_time = temp_time
if (temp_volume > QUIET_DB):
# print('录音中,当前音量高于阈值,正常录音')
quiet_flag = False
last_ok_time = temp_time
if (temp_time > last_ok_time + delay_time * 15 and quiet_flag == True):
print("音量低于阈值{:.2f}秒后,检测当前音量".format(delay_time))
if (quiet_flag and temp_volume < QUIET_DB):
print("当前音量仍然小于阈值,录音结束")
END_TIME = temp_time
break
else:
print("当前音量重新高于阈值,继续录音中")
quiet_flag = False
last_ok_time = temp_time
# print('当前帧 {} 音量 {}'.format(temp_time+1, temp_volume))
temp_time += 1
if temp_time > 150: # 超时直接退出
END_TIME = temp_time
print('超时,录音结束')
break
# 停止录音
stream.stop_stream()
stream.close()
p.terminate()
# 导出wav音频文件
output_path = './speech_record.wav'
wf = wave.open(output_path, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames[START_TIME - 2:END_TIME]))
wf.close()
print('保存录音文件', output_path)
#
#
def speech_recognition(audio_path='./speech_record.wav'):
'''
AppBuilder-SDK语音识别组件
'''
print('开始语音识别')
# 载入wav音频文件
with wave.open(audio_path, 'rb') as wav_file:
# 获取音频文件的基本信息
num_channels = wav_file.getnchannels()
sample_width = wav_file.getsampwidth()
framerate = wav_file.getframerate()
num_frames = wav_file.getnframes()
# 获取音频数据
frames = wav_file.readframes(num_frames)
# 向API发起请求
content_data = {"audio_format": "wav", "raw_audio": frames, "rate": 16000}
message = appbuilder.Message(content_data)
speech_result = asr.run(message).content['result'][0]
print('语音识别结果:', speech_result)
return speech_result
if __name__ == "__main__":
record_auto()
speech_recognition()