Speex.c
9.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#include "JZsdkLib.h"
#include "version_choose.h"
#ifdef SPEEX_STATUS_ON
#include <stdio.h>
#include <string.h>
#include "speex/speex_echo.h"
#include "speex/speex_preprocess.h"
// ========== 可配置参数 ==========
#define TAIL_MS 300 // 回声尾长(毫秒)
#define FRAME_SAMPLES 640 // 每帧样本数 640个short
#define SAMPLE_RATE 16000 // 采样率
#define PLAYBACK_DELAY_FRAMES 1 // 播放延迟(帧数),1 表示用上一帧作为参考
// =================================
typedef struct JZ_SpeexInfo {
int SampleRate;
int FrameSize;
int TailLen; // 回声尾长(样本数)
int PlaybackDelaySamples; // 播放延迟(样本数)
SpeexEchoState* EchoState;
SpeexPreprocessState* PreprocessState;
// 环形缓冲区,存储历史输出帧(即播放过的数据)
short* HistoryBuffer;
int HistorySize; // 缓冲区总长度(样本数)
int WritePos; // 下一个写入位置
int TotalWritten; // 累计写入样本数(用于判断缓冲区是否足够)
SpeexPreprocessState* DenoiseOnlyState;
int DenoiseOnlyFlag; // 是否启用独立降噪模式
int Flag;
} JZ_SpeexInfo;
static JZ_SpeexInfo g_SpeexInfo = { 0 };
// 初始化历史缓冲区
static int InitHistoryBuffer(int size_samples) {
if (g_SpeexInfo.HistoryBuffer) {
free(g_SpeexInfo.HistoryBuffer);
g_SpeexInfo.HistoryBuffer = NULL;
}
g_SpeexInfo.HistoryBuffer = (short*)malloc(size_samples * sizeof(short));
if (!g_SpeexInfo.HistoryBuffer) return -1;
memset(g_SpeexInfo.HistoryBuffer, 0, size_samples * sizeof(short));
g_SpeexInfo.HistorySize = size_samples;
g_SpeexInfo.WritePos = 0;
g_SpeexInfo.TotalWritten = 0;
return 0;
}
// 写入一帧到历史缓冲区(播放过的帧)
static void WriteHistoryFrame(short* frame) {
int fs = g_SpeexInfo.FrameSize;
int hist_size = g_SpeexInfo.HistorySize;
int write_pos = g_SpeexInfo.WritePos;
if (write_pos + fs <= hist_size) {
memcpy(g_SpeexInfo.HistoryBuffer + write_pos, frame, fs * sizeof(short));
}
else {
int first_part = hist_size - write_pos;
memcpy(g_SpeexInfo.HistoryBuffer + write_pos, frame, first_part * sizeof(short));
memcpy(g_SpeexInfo.HistoryBuffer, frame + first_part, (fs - first_part) * sizeof(short));
}
g_SpeexInfo.WritePos = (write_pos + fs) % hist_size;
g_SpeexInfo.TotalWritten += fs;
}
// 从历史缓冲区读取参考帧(对齐到当前麦克风时间)
static int ReadRefFrame(short* out_ref) {
int fs = g_SpeexInfo.FrameSize;
int hist_size = g_SpeexInfo.HistorySize;
int write_pos = g_SpeexInfo.WritePos;
int delay_samples = g_SpeexInfo.PlaybackDelaySamples;
int read_pos = write_pos - delay_samples - fs;
if (read_pos < 0) read_pos += hist_size;
if (g_SpeexInfo.TotalWritten < delay_samples + fs) {
return -1;
}
if (read_pos + fs <= hist_size) {
memcpy(out_ref, g_SpeexInfo.HistoryBuffer + read_pos, fs * sizeof(short));
}
else {
int first_part = hist_size - read_pos;
memcpy(out_ref, g_SpeexInfo.HistoryBuffer + read_pos, first_part * sizeof(short));
memcpy(out_ref + first_part, g_SpeexInfo.HistoryBuffer, (fs - first_part) * sizeof(short));
}
return 0;
}
T_JZsdkReturnCode Speex_Deinit()
{
if (g_SpeexInfo.Flag == JZ_FLAGCODE_ON)
{
if (g_SpeexInfo.EchoState)
{
speex_echo_state_destroy(g_SpeexInfo.EchoState);
g_SpeexInfo.EchoState = NULL;
}
if (g_SpeexInfo.PreprocessState)
{
speex_preprocess_state_destroy(g_SpeexInfo.PreprocessState);
g_SpeexInfo.PreprocessState = NULL;
}
if (g_SpeexInfo.HistoryBuffer)
{
free(g_SpeexInfo.HistoryBuffer);
g_SpeexInfo.HistoryBuffer = NULL;
}
memset(&g_SpeexInfo, 0, sizeof(g_SpeexInfo));
g_SpeexInfo.Flag = JZ_FLAGCODE_OFF;
}
//降噪注销
if (g_SpeexInfo.DenoiseOnlyState)
{
speex_preprocess_state_destroy(g_SpeexInfo.DenoiseOnlyState);
g_SpeexInfo.DenoiseOnlyState = NULL;
}
JZSDK_LOG_DEBUG("Speex_Deinit success\n");
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
}
// 初始化
// sample_rate: 采样率(如 16000)
T_JZsdkReturnCode Speex_Init(int sample_rate)
{
if (g_SpeexInfo.Flag == JZ_FLAGCODE_ON)
{
Speex_Deinit();
}
int frame_samples = FRAME_SAMPLES;
int playback_delay_frames = PLAYBACK_DELAY_FRAMES;
g_SpeexInfo.SampleRate = sample_rate;
g_SpeexInfo.FrameSize = frame_samples;
g_SpeexInfo.TailLen = sample_rate * TAIL_MS / 1000;
g_SpeexInfo.PlaybackDelaySamples = playback_delay_frames * frame_samples;
int hist_size = g_SpeexInfo.TailLen + g_SpeexInfo.PlaybackDelaySamples + frame_samples * 2;
g_SpeexInfo.EchoState = speex_echo_state_init(frame_samples, g_SpeexInfo.TailLen);
if (!g_SpeexInfo.EchoState) {
JZSDK_LOG_DEBUG("Speex_Init: speex_echo_state_init failed\n");
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
}
speex_echo_ctl(g_SpeexInfo.EchoState, SPEEX_ECHO_SET_SAMPLING_RATE, &sample_rate);
g_SpeexInfo.PreprocessState = speex_preprocess_state_init(frame_samples, sample_rate);
if (!g_SpeexInfo.PreprocessState)
{
speex_echo_state_destroy(g_SpeexInfo.EchoState);
JZSDK_LOG_DEBUG("Speex_Init: speex_preprocess_state_init failed\n");
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
}
speex_preprocess_ctl(g_SpeexInfo.PreprocessState, SPEEX_PREPROCESS_SET_ECHO_STATE, g_SpeexInfo.EchoState);
if (InitHistoryBuffer(hist_size) != 0) {
speex_echo_state_destroy(g_SpeexInfo.EchoState);
speex_preprocess_state_destroy(g_SpeexInfo.PreprocessState);
JZSDK_LOG_DEBUG("Speex_Init: history buffer allocation failed\n");
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
}
g_SpeexInfo.Flag = JZ_FLAGCODE_ON;
JZSDK_LOG_DEBUG("Speex_Init success: sr=%d, fs=%d, tail=%d, delay=%d samples, hist=%d\n",
sample_rate, frame_samples, g_SpeexInfo.TailLen,
g_SpeexInfo.PlaybackDelaySamples, hist_size);
/***********************************
降噪配置
*************************************/
g_SpeexInfo.DenoiseOnlyState = speex_preprocess_state_init(FRAME_SAMPLES, sample_rate);
if (!g_SpeexInfo.DenoiseOnlyState) {
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
}
// 在 Speex_Init 中,创建 DenoiseOnlyState 后添加:
int vad = 0;
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_VAD, &vad);
int agc = 0;
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_AGC, &agc);
int denoise = 1; // 保持开启
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_DENOISE, &denoise);
// 可选:关闭降噪的自动增益补偿
int noise_suppress = 0; // 或者尝试 1,2...
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &noise_suppress);
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
}
// 独立降噪处理(char* 版本,原地处理)
T_JZsdkReturnCode Speex_DenoiseOnly_Process(short* mic, short* out)
{
if (!g_SpeexInfo.DenoiseOnlyState)
{
if (out != mic) memcpy(out, mic, FRAME_SAMPLES * sizeof(short));
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
}
if (out != mic) memcpy(out, mic, FRAME_SAMPLES * sizeof(short));
speex_preprocess_run(g_SpeexInfo.DenoiseOnlyState, out);
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
}
// 处理麦克风数据(char* 版本)
// mic: 麦克风采集的原始 PCM 字节流(长度 = frame_samples * sizeof(short) = 640 字节)
// out: 处理后的干净 PCM 字节流(长度相同,可与 mic 共用内存)
// 注意:out 同时会被保存到历史缓冲区,作为下一帧的参考信号(播放数据)
T_JZsdkReturnCode Speex_ProcessMic(short* mic, short* out)
{
if (g_SpeexInfo.Flag == JZ_FLAGCODE_OFF)
{
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
}
short ref_frame[FRAME_SAMPLES]; // 用于存放参考帧
if (ReadRefFrame(ref_frame) != 0)
{
// 历史数据不足,直接拷贝输出
if (out != mic) memcpy(out, mic, g_SpeexInfo.FrameSize * sizeof(short));
WriteHistoryFrame(out);
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
}
// 执行回声消除
speex_echo_cancellation(g_SpeexInfo.EchoState, mic, ref_frame, out);
// 执行后处理(噪声抑制等)
speex_preprocess_run(g_SpeexInfo.PreprocessState, out);
// 将处理后的帧写入历史缓冲区(因为它即将被播放)
WriteHistoryFrame(out);
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
}
#endif // SPEEX_STATUS_ON