|
|
|
#include "JZsdkLib.h"
|
|
|
|
#include "JZsdkLib.h"
|
|
|
|
#include "version_choose.h"
|
|
|
|
|
|
|
|
//main函数参数一是原始pcm文件名,参数二是去噪后的pcm文件名
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SPEEX_STATUS_ON
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "speex/speex_echo.h" // Speex 回声消除头文件
|
|
|
|
#include "speex/speex_preprocess.h" // Speex 预处理头文件
|
|
|
|
|
|
|
|
/*
|
|
|
|
graph LR
|
|
|
|
|
|
|
|
A[原始语音信号] --> B[预处理]
|
|
|
|
B --> C[语言检测]
|
|
|
|
C -->|英语| D[英语参数配置]
|
|
|
|
C -->|汉语| E[汉语参数配置]
|
|
|
|
C -->|法语| F[法语参数配置]
|
|
|
|
D --> G[Speex编码器]
|
|
|
|
E --> H[Speex编码器]
|
|
|
|
F --> I[Speex编码器]
|
|
|
|
G --> J[输出编码数据]
|
|
|
|
H --> J
|
|
|
|
I --> J
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define TAIL_TIME 500 //回声尾长,单位毫秒
|
|
|
|
|
|
|
|
typedef struct JZ_SpeexInfo{
|
|
|
|
|
|
|
|
//处理的音频样本长度, 一般对应10~20ms的音频数据,太小会增加计算开销,太大会增加处理延迟
|
|
|
|
int DealSampleLen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
回声尾长 表示需要消除的回声持续时间(以样本数计) 建议值对应100-500毫秒的音频数据
|
|
|
|
对于8kHz采样率:800-4000个样本
|
|
|
|
对于16kHz采样率:1600-8000个样本
|
|
|
|
*/
|
|
|
|
int TailLen;
|
|
|
|
|
|
|
|
//音频采样率
|
|
|
|
#include "speex/speex_echo.h"
|
|
|
|
#include "speex/speex_preprocess.h"
|
|
|
|
|
|
|
|
// ========== 可配置参数 ==========
|
|
|
|
#define TAIL_MS 300 // 回声尾长(毫秒)
|
|
|
|
#define FRAME_SAMPLES 640 // 每帧样本数 640个short
|
|
|
|
#define SAMPLE_RATE 16000 // 采样率
|
|
|
|
#define PLAYBACK_DELAY_FRAMES 1 // 播放延迟(帧数),1 表示用上一帧作为参考
|
|
|
|
// =================================
|
|
|
|
|
|
|
|
typedef struct JZ_SpeexInfo {
|
|
|
|
int SampleRate;
|
|
|
|
int FrameSize;
|
|
|
|
int TailLen; // 回声尾长(样本数)
|
|
|
|
int PlaybackDelaySamples; // 播放延迟(样本数)
|
|
|
|
|
|
|
|
// Speex回声消除状态
|
|
|
|
SpeexEchoState *EchoState;
|
|
|
|
SpeexEchoState* EchoState;
|
|
|
|
SpeexPreprocessState* PreprocessState;
|
|
|
|
|
|
|
|
// Speex预处理状态
|
|
|
|
SpeexPreprocessState *PreprocessState;
|
|
|
|
// 环形缓冲区,存储历史输出帧(即播放过的数据)
|
|
|
|
short* HistoryBuffer;
|
|
|
|
int HistorySize; // 缓冲区总长度(样本数)
|
|
|
|
int WritePos; // 下一个写入位置
|
|
|
|
int TotalWritten; // 累计写入样本数(用于判断缓冲区是否足够)
|
|
|
|
|
|
|
|
// 存储回声消除的样本
|
|
|
|
short *EchoBuf;
|
|
|
|
int EchoBufLen;
|
|
|
|
SpeexPreprocessState* DenoiseOnlyState;
|
|
|
|
int DenoiseOnlyFlag; // 是否启用独立降噪模式
|
|
|
|
|
|
|
|
//标志位
|
|
|
|
int Flag;
|
|
|
|
} JZ_SpeexInfo;
|
|
|
|
|
|
|
|
static JZ_SpeexInfo g_SpeexInfo = { 0 };
|
|
|
|
|
|
|
|
// 初始化历史缓冲区
|
|
|
|
static int InitHistoryBuffer(int size_samples) {
|
|
|
|
if (g_SpeexInfo.HistoryBuffer) {
|
|
|
|
free(g_SpeexInfo.HistoryBuffer);
|
|
|
|
g_SpeexInfo.HistoryBuffer = NULL;
|
|
|
|
}
|
|
|
|
g_SpeexInfo.HistoryBuffer = (short*)malloc(size_samples * sizeof(short));
|
|
|
|
if (!g_SpeexInfo.HistoryBuffer) return -1;
|
|
|
|
memset(g_SpeexInfo.HistoryBuffer, 0, size_samples * sizeof(short));
|
|
|
|
g_SpeexInfo.HistorySize = size_samples;
|
|
|
|
g_SpeexInfo.WritePos = 0;
|
|
|
|
g_SpeexInfo.TotalWritten = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 写入一帧到历史缓冲区(播放过的帧)
|
|
|
|
static void WriteHistoryFrame(short* frame) {
|
|
|
|
int fs = g_SpeexInfo.FrameSize;
|
|
|
|
int hist_size = g_SpeexInfo.HistorySize;
|
|
|
|
int write_pos = g_SpeexInfo.WritePos;
|
|
|
|
|
|
|
|
if (write_pos + fs <= hist_size) {
|
|
|
|
memcpy(g_SpeexInfo.HistoryBuffer + write_pos, frame, fs * sizeof(short));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int first_part = hist_size - write_pos;
|
|
|
|
memcpy(g_SpeexInfo.HistoryBuffer + write_pos, frame, first_part * sizeof(short));
|
|
|
|
memcpy(g_SpeexInfo.HistoryBuffer, frame + first_part, (fs - first_part) * sizeof(short));
|
|
|
|
}
|
|
|
|
g_SpeexInfo.WritePos = (write_pos + fs) % hist_size;
|
|
|
|
g_SpeexInfo.TotalWritten += fs;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 从历史缓冲区读取参考帧(对齐到当前麦克风时间)
|
|
|
|
static int ReadRefFrame(short* out_ref) {
|
|
|
|
int fs = g_SpeexInfo.FrameSize;
|
|
|
|
int hist_size = g_SpeexInfo.HistorySize;
|
|
|
|
int write_pos = g_SpeexInfo.WritePos;
|
|
|
|
int delay_samples = g_SpeexInfo.PlaybackDelaySamples;
|
|
|
|
|
|
|
|
int read_pos = write_pos - delay_samples - fs;
|
|
|
|
if (read_pos < 0) read_pos += hist_size;
|
|
|
|
|
|
|
|
}JZ_SpeexInfo;
|
|
|
|
if (g_SpeexInfo.TotalWritten < delay_samples + fs) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static JZ_SpeexInfo g_SpeexInfo = {0};
|
|
|
|
if (read_pos + fs <= hist_size) {
|
|
|
|
memcpy(out_ref, g_SpeexInfo.HistoryBuffer + read_pos, fs * sizeof(short));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int first_part = hist_size - read_pos;
|
|
|
|
memcpy(out_ref, g_SpeexInfo.HistoryBuffer + read_pos, first_part * sizeof(short));
|
|
|
|
memcpy(out_ref + first_part, g_SpeexInfo.HistoryBuffer, (fs - first_part) * sizeof(short));
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
T_JZsdkReturnCode Speex_Deinit()
|
|
|
|
{
|
|
|
|
if (g_SpeexInfo.Flag == JZ_FLAGCODE_ON)
|
|
|
|
{
|
|
|
|
speex_echo_state_destroy(g_SpeexInfo.EchoState); // 释放回声消除状态
|
|
|
|
speex_preprocess_state_destroy(g_SpeexInfo.PreprocessState); // 释放预处理状态
|
|
|
|
if (g_SpeexInfo.EchoState)
|
|
|
|
{
|
|
|
|
speex_echo_state_destroy(g_SpeexInfo.EchoState);
|
|
|
|
g_SpeexInfo.EchoState = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(&(g_SpeexInfo.EchoBuf), 0, sizeof(g_SpeexInfo.EchoBuf));
|
|
|
|
g_SpeexInfo.EchoBufLen = 0;
|
|
|
|
if (g_SpeexInfo.PreprocessState)
|
|
|
|
{
|
|
|
|
speex_preprocess_state_destroy(g_SpeexInfo.PreprocessState);
|
|
|
|
g_SpeexInfo.PreprocessState = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (g_SpeexInfo.EchoBuf != NULL)
|
|
|
|
if (g_SpeexInfo.HistoryBuffer)
|
|
|
|
{
|
|
|
|
free(g_SpeexInfo.EchoBuf);
|
|
|
|
g_SpeexInfo.EchoBuf = NULL;
|
|
|
|
free(g_SpeexInfo.HistoryBuffer);
|
|
|
|
g_SpeexInfo.HistoryBuffer = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(&g_SpeexInfo, 0, sizeof(g_SpeexInfo));
|
|
|
|
g_SpeexInfo.Flag = JZ_FLAGCODE_OFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Deinit success\n");
|
|
|
|
//降噪注销
|
|
|
|
if (g_SpeexInfo.DenoiseOnlyState)
|
|
|
|
{
|
|
|
|
speex_preprocess_state_destroy(g_SpeexInfo.DenoiseOnlyState);
|
|
|
|
g_SpeexInfo.DenoiseOnlyState = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Deinit success\n");
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
T_JZsdkReturnCode Speex_Init(int SampleRate)
|
|
|
|
// 初始化
|
|
|
|
// sample_rate: 采样率(如 16000)
|
|
|
|
T_JZsdkReturnCode Speex_Init(int sample_rate)
|
|
|
|
{
|
|
|
|
//检查speex的参数
|
|
|
|
if (g_SpeexInfo.Flag == JZ_FLAGCODE_ON)
|
|
|
|
{
|
|
|
|
Speex_Deinit();
|
|
|
|
}
|
|
|
|
|
|
|
|
g_SpeexInfo.SampleRate = SampleRate;
|
|
|
|
int frame_samples = FRAME_SAMPLES;
|
|
|
|
int playback_delay_frames = PLAYBACK_DELAY_FRAMES;
|
|
|
|
|
|
|
|
//计算长度
|
|
|
|
g_SpeexInfo.TailLen = SampleRate * TAIL_TIME / 1000 ; //可以×1.2作为余量
|
|
|
|
g_SpeexInfo.DealSampleLen = 640; //16000 * time / 1000 //目前是因为程序写死了80 后面可以改
|
|
|
|
g_SpeexInfo.SampleRate = sample_rate;
|
|
|
|
g_SpeexInfo.FrameSize = frame_samples;
|
|
|
|
g_SpeexInfo.TailLen = sample_rate * TAIL_MS / 1000;
|
|
|
|
g_SpeexInfo.PlaybackDelaySamples = playback_delay_frames * frame_samples;
|
|
|
|
|
|
|
|
// 初始化回声消除状态
|
|
|
|
g_SpeexInfo.EchoState = speex_echo_state_init(g_SpeexInfo.DealSampleLen, g_SpeexInfo.TailLen);
|
|
|
|
g_SpeexInfo.PreprocessState = speex_preprocess_state_init(g_SpeexInfo.DealSampleLen, g_SpeexInfo.SampleRate); // 初始化预处理状态
|
|
|
|
int hist_size = g_SpeexInfo.TailLen + g_SpeexInfo.PlaybackDelaySamples + frame_samples * 2;
|
|
|
|
|
|
|
|
//设置采样率
|
|
|
|
speex_echo_ctl(g_SpeexInfo.EchoState, SPEEX_ECHO_SET_SAMPLING_RATE, &(g_SpeexInfo.SampleRate));
|
|
|
|
g_SpeexInfo.EchoState = speex_echo_state_init(frame_samples, g_SpeexInfo.TailLen);
|
|
|
|
if (!g_SpeexInfo.EchoState) {
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Init: speex_echo_state_init failed\n");
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
}
|
|
|
|
speex_echo_ctl(g_SpeexInfo.EchoState, SPEEX_ECHO_SET_SAMPLING_RATE, &sample_rate);
|
|
|
|
|
|
|
|
g_SpeexInfo.PreprocessState = speex_preprocess_state_init(frame_samples, sample_rate);
|
|
|
|
if (!g_SpeexInfo.PreprocessState)
|
|
|
|
{
|
|
|
|
speex_echo_state_destroy(g_SpeexInfo.EchoState);
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Init: speex_preprocess_state_init failed\n");
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
//将预处理状态与回声消除状态关联
|
|
|
|
speex_preprocess_ctl(g_SpeexInfo.PreprocessState, SPEEX_PREPROCESS_SET_ECHO_STATE, g_SpeexInfo.EchoState);
|
|
|
|
|
|
|
|
//注册预处理数据数组
|
|
|
|
g_SpeexInfo.EchoBuf = (short *)malloc(g_SpeexInfo.TailLen * sizeof(short));
|
|
|
|
if (InitHistoryBuffer(hist_size) != 0) {
|
|
|
|
speex_echo_state_destroy(g_SpeexInfo.EchoState);
|
|
|
|
speex_preprocess_state_destroy(g_SpeexInfo.PreprocessState);
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Init: history buffer allocation failed\n");
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
g_SpeexInfo.Flag = JZ_FLAGCODE_ON;
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Init success: sr=%d, fs=%d, tail=%d, delay=%d samples, hist=%d\n",
|
|
|
|
sample_rate, frame_samples, g_SpeexInfo.TailLen,
|
|
|
|
g_SpeexInfo.PlaybackDelaySamples, hist_size);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
JZSDK_LOG_DEBUG("Speex_Init success\n");
|
|
|
|
/***********************************
|
|
|
|
|
|
|
|
降噪配置
|
|
|
|
|
|
|
|
|
|
|
|
*************************************/
|
|
|
|
g_SpeexInfo.DenoiseOnlyState = speex_preprocess_state_init(FRAME_SAMPLES, sample_rate);
|
|
|
|
if (!g_SpeexInfo.DenoiseOnlyState) {
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 在 Speex_Init 中,创建 DenoiseOnlyState 后添加:
|
|
|
|
int vad = 0;
|
|
|
|
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_VAD, &vad);
|
|
|
|
int agc = 0;
|
|
|
|
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_AGC, &agc);
|
|
|
|
int denoise = 1; // 保持开启
|
|
|
|
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_DENOISE, &denoise);
|
|
|
|
// 可选:关闭降噪的自动增益补偿
|
|
|
|
int noise_suppress = 0; // 或者尝试 1,2...
|
|
|
|
speex_preprocess_ctl(g_SpeexInfo.DenoiseOnlyState, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &noise_suppress);
|
|
|
|
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
T_JZsdkReturnCode Speex_DealData(short *InData, short *OutData, int frame_size)
|
|
|
|
|
|
|
|
// 独立降噪处理(char* 版本,原地处理)
|
|
|
|
T_JZsdkReturnCode Speex_DenoiseOnly_Process(short* mic, short* out)
|
|
|
|
{
|
|
|
|
if (g_SpeexInfo.Flag == JZ_FLAGCODE_OFF)
|
|
|
|
if (!g_SpeexInfo.DenoiseOnlyState)
|
|
|
|
{
|
|
|
|
if (out != mic) memcpy(out, mic, FRAME_SAMPLES * sizeof(short));
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
//填充回声消除的样本
|
|
|
|
if (g_SpeexInfo.EchoBufLen < g_SpeexInfo.TailLen)
|
|
|
|
{
|
|
|
|
memcpy(&(g_SpeexInfo.EchoBuf[g_SpeexInfo.EchoBufLen]), InData, frame_size * sizeof(short));
|
|
|
|
g_SpeexInfo.EchoBufLen += frame_size;
|
|
|
|
}
|
|
|
|
//如果里面有完整数据
|
|
|
|
else if (g_SpeexInfo.EchoBufLen == g_SpeexInfo.DealSampleLen)
|
|
|
|
{
|
|
|
|
//对数据进行位移
|
|
|
|
memmove(g_SpeexInfo.EchoBuf, &(g_SpeexInfo.EchoBuf[frame_size]), (g_SpeexInfo.EchoBufLen - frame_size) * sizeof(short));
|
|
|
|
if (out != mic) memcpy(out, mic, FRAME_SAMPLES * sizeof(short));
|
|
|
|
|
|
|
|
//将新的数据填充到回声消除的样本中
|
|
|
|
memcpy(&(g_SpeexInfo.EchoBuf[g_SpeexInfo.EchoBufLen - frame_size]), InData, frame_size * sizeof(short));
|
|
|
|
}
|
|
|
|
//如果里面的数据超了
|
|
|
|
else if (g_SpeexInfo.EchoBufLen > g_SpeexInfo.TailLen)
|
|
|
|
speex_preprocess_run(g_SpeexInfo.DenoiseOnlyState, out);
|
|
|
|
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 处理麦克风数据(char* 版本)
|
|
|
|
// mic: 麦克风采集的原始 PCM 字节流(长度 = frame_samples * sizeof(short) = 640 字节)
|
|
|
|
// out: 处理后的干净 PCM 字节流(长度相同,可与 mic 共用内存)
|
|
|
|
// 注意:out 同时会被保存到历史缓冲区,作为下一帧的参考信号(播放数据)
|
|
|
|
T_JZsdkReturnCode Speex_ProcessMic(short* mic, short* out)
|
|
|
|
{
|
|
|
|
if (g_SpeexInfo.Flag == JZ_FLAGCODE_OFF)
|
|
|
|
{
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
//如果没有填充完回声数组
|
|
|
|
if (g_SpeexInfo.EchoBufLen < g_SpeexInfo.TailLen)
|
|
|
|
short ref_frame[FRAME_SAMPLES]; // 用于存放参考帧
|
|
|
|
if (ReadRefFrame(ref_frame) != 0)
|
|
|
|
{
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_FAILURE;
|
|
|
|
// 历史数据不足,直接拷贝输出
|
|
|
|
if (out != mic) memcpy(out, mic, g_SpeexInfo.FrameSize * sizeof(short));
|
|
|
|
WriteHistoryFrame(out);
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
// // 执行回声消除
|
|
|
|
// speex_echo_cancellation(g_SpeexInfo.EchoState, InData, g_SpeexInfo.EchoBuf, OutData);
|
|
|
|
// 执行回声消除
|
|
|
|
speex_echo_cancellation(g_SpeexInfo.EchoState, mic, ref_frame, out);
|
|
|
|
|
|
|
|
// 执行后处理(噪声抑制等)
|
|
|
|
speex_preprocess_run(g_SpeexInfo.PreprocessState, out);
|
|
|
|
|
|
|
|
// // 执行预处理(如噪声抑制等)
|
|
|
|
// speex_preprocess_run(g_SpeexInfo.PreprocessState, OutData);
|
|
|
|
// 将处理后的帧写入历史缓冲区(因为它即将被播放)
|
|
|
|
WriteHistoryFrame(out);
|
|
|
|
|
|
|
|
return JZ_ERROR_SYSTEM_MODULE_CODE_SUCCESS;
|
|
|
|
}
|
...
|
...
|
|