前面分享过一个算法音频增益响度分析 ReplayGain 附完整 C 代码示例
主要用于评估一定长度音频的音量强度,
而分析之后, 很多类似的需求, 肯定是做音频增益, 提高音量诸如此类做法.
不过在项目实测的时候, 其实真的很难定标准,
到底在什么样的环境下, 要增大音量, 还是降低.
在通讯行业一般的做法就是采用静音检测,
一旦检测为静音或者噪音, 则不做处理, 反之通过一定的策略进行处理.
这里就涉及到两个算法, 一个是静音检测, 一个是音频增益.
增益其实没什么好说的, 类似于数据归一化拉伸的做法.
静音检测 在 WebRTC 中 是采用计算 GMM (Gaussian Mixture Model, 高斯混合模型) 进行特征提取的.
在很长一段时间里面, 音频特征 有 3 个主要的方法,
GMM ,Spectrogram (声谱图), MFCC 即 Mel-Frequency Cepstrum(Mel 频率倒谱)
恕我直言, GMM 提取的特征, 其鲁棒性 不如后两者.
也不多做介绍, 感兴趣的同学, 翻翻 维基百科 , 补补课.
当然在实际使用算法时, 会由此延伸出来一些小技巧.
例如, 用静音检测 来做音频裁剪, 或者搭配音频增益做一些音频增强之类的操作.
自动增益在 WebRTC 源代码文件是: analog_agc.c 和 digital_agc.c
静音检测 源代码文件是: webrtc_vad.c
这个命名, 有一定的历史原因了.
经过梳理后,
增益算法为 agc.c agc.h
静音检测为 vad.c vad.h
增益算法的完整示例代码:
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdint.h>
- // 采用 https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
- #define DR_WAV_IMPLEMENTATION
- #include "dr_wav.h"
- #include "agc.h"
- #ifndef nullptr
- #define nullptr 0
- #endif
- #ifndef MIN
- #define MIN(A, B) ((A) <(B) ? (A) : (B))
- #endif
- // 写 wav 文件
- void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount) {
- drwav_data_format format = {};
- format.container = drwav_container_riff; // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
- format.format = DR_WAVE_FORMAT_PCM; // <-- Any of the DR_WAVE_FORMAT_* codes.
- format.channels = 1;
- format.sampleRate = (drwav_uint32) sampleRate;
- format.bitsPerSample = 16;
- drwav *pWav = drwav_open_file_write(filename, &format);
- if (pWav) {
- drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);
- drwav_uninit(pWav);
- if (samplesWritten != totalSampleCount) {
- fprintf(stderr, "ERROR\n");
- exit(1);
- }
- }
- }
- // 读取 wav 文件
- int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
- unsigned int channels;
- int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
- if (buffer == nullptr) {
- printf("读取 wav 文件失败.");
- }
- // 仅仅处理单通道音频
- if (channels != 1) {
- drwav_free(buffer);
- buffer = nullptr;
- *sampleRate = 0;
- *totalSampleCount = 0;
- }
- return buffer;
- }
- // 分割路径函数
- void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
- const char *end;
- const char *p;
- const char *s;
- if (path[0] && path[1] == ':') {
- if (drv) {
- *drv++ = *path++;
- *drv++ = *path++;
- *drv = '\0';
- }
- } else if (drv)
- *drv = '\0';
- for (end = path; *end && *end != ':';)
- end++;
- for (p = end; p> path && *--p != '\\' && *p != '/';)
- if (*p == '.') {
- end = p;
- break;
- }
- if (ext)
- for (s = end; (*ext = *s++);)
- ext++;
- for (p = end; p> path;)
- if (*--p == '\\' || *p == '/') {
- p++;
- break;
- }
- if (name) {
- for (s = p; s <end;)
- *name++ = *s++;
- *name = '\0';
- }
- if (dir) {
- for (s = path; s < p;)
- *dir++ = *s++;
- *dir = '\0';
- }
- }
- int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) {
- if (buffer == nullptr) return -1;
- if (samplesCount == 0) return -1;
- WebRtcAgcConfig agcConfig;
- agcConfig.compressionGaindB = 9; // default 9 dB
- agcConfig.limiterEnable = 1; // default kAgcTrue (on)
- agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv)
- int minLevel = 0;
- int maxLevel = 255;
- size_t samples = MIN(160, sampleRate / 100);
- if (samples == 0) return -1;
- const int maxSamples = 320;
- int16_t *input = buffer;
- size_t nTotal = (samplesCount / samples);
- void *agcInst = WebRtcAgc_Create();
- if (agcInst == NULL) return -1;
- int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate);
- if (status != 0) {
- printf("WebRtcAgc_Init fail\n");
- WebRtcAgc_Free(agcInst);
- return -1;
- }
- status = WebRtcAgc_set_config(agcInst, agcConfig);
- if (status != 0) {
- printf("WebRtcAgc_set_config fail\n");
- WebRtcAgc_Free(agcInst);
- return -1;
- }
- size_t num_bands = 1;
- int inMicLevel, outMicLevel = -1;
- int16_t out_buffer[maxSamples];
- int16_t *out16 = out_buffer;
- uint8_t saturationWarning = 1; // 是否有溢出发生, 增益放大以后的最大值超过了 65536
- int16_t echo = 0; // 增益放大是否考虑回声影响
- for (int i = 0; i < nTotal; i++) {
- inMicLevel = 0;
- int nAgcRet = WebRtcAgc_Process(agcInst, (const int16_t *const *) &input, num_bands, samples,
- (int16_t *const *) &out16, inMicLevel, &outMicLevel, echo,
- &saturationWarning);
- if (nAgcRet != 0) {
- printf("failed in WebRtcAgc_Process\n");
- WebRtcAgc_Free(agcInst);
- return -1;
- }
- memcpy(input, out_buffer, samples * sizeof(int16_t));
- input += samples;
- }
- WebRtcAgc_Free(agcInst);
- return 1;
- }
- void auto_gain(char *in_file, char *out_file) {
- // 音频采样率
- uint32_t sampleRate = 0;
- // 总音频采样数
- uint64_t inSampleCount = 0;
- int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
- // 如果加载成功
- if (inBuffer != nullptr) {
- // kAgcModeAdaptiveAnalog 模拟音量调节
- // kAgcModeAdaptiveDigital 自适应增益
- // kAgcModeFixedDigital 固定增益
agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital);
- wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount);
- free(inBuffer);
- }
- }
- int main(int argc, char *argv[]) {
- printf("WebRTC Automatic Gain Control\n");
- printf("博客: http://cpuimage.cnblogs.com/\n");
- printf("音频自动增益 \ n");
- if (argc < 2)
- return -1;
- char *in_file = argv[1];
- char drive[3];
- char dir[256];
- char fname[256];
- char ext[256];
- char out_file[1024];
- splitpath(in_file, drive, dir, fname, ext);
- sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext);
- auto_gain(in_file, out_file);
- printf("按任意键退出程序 \n");
- getchar();
- return 0;
- }
静音检测完整示例代码:
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdint.h>
- // 采用 https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
- #define DR_WAV_IMPLEMENTATION
- #include "dr_wav.h"
- #include "vad.h"
- #ifndef nullptr
- #define nullptr 0
- #endif
- #ifndef MIN
- #define MIN(A, B) ((A) <(B) ? (A) : (B))
- #endif
- #ifndef MAX
- #define MAX(A, B) ((A)> (B) ? (A) : (B))
- #endif
- // 读取 wav 文件
- int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
- unsigned int channels;
- int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
- if (buffer == nullptr) {
- printf("读取 wav 文件失败.");
- }
- // 仅仅处理单通道音频
- if (channels != 1) {
- drwav_free(buffer);
- buffer = nullptr;
- *sampleRate = 0;
- *totalSampleCount = 0;
- }
- return buffer;
- }
- int vadProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t vad_mode, int per_ms_frames) {
- if (buffer == nullptr) return -1;
- if (samplesCount == 0) return -1;
// kValidRates : 8000, 16000, 32000, 48000
- // 10, 20 or 30 ms frames
- per_ms_frames = MAX(MIN(30, per_ms_frames), 10);
- size_t samples = sampleRate * per_ms_frames / 1000;
- if (samples == 0) return -1;
- int16_t *input = buffer;
- size_t nTotal = (samplesCount / samples);
- void *vadInst = WebRtcVad_Create();
- if (vadInst == NULL) return -1;
- int status = WebRtcVad_Init(vadInst);
- if (status != 0) {
- printf("WebRtcVad_Init fail\n");
- WebRtcVad_Free(vadInst);
- return -1;
- }
- status = WebRtcVad_set_mode(vadInst, vad_mode);
- if (status != 0) {
- printf("WebRtcVad_set_mode fail\n");
- WebRtcVad_Free(vadInst);
- return -1;
- }
- printf("Activity : \n");
- for (int i = 0; i <nTotal; i++) {
- int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
- if (nVadRet == -1) {
- printf("failed in WebRtcVad_Process\n");
- WebRtcVad_Free(vadInst);
- return -1;
- } else {
- // output result
- printf("%d \t", nVadRet);
- }
- input += samples;
- }
- printf("\n");
- WebRtcVad_Free(vadInst);
- return 1;
- }
- void vad(char *in_file) {
- // 音频采样率
- uint32_t sampleRate = 0;
- // 总音频采样数
- uint64_t inSampleCount = 0;
- int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
- // 如果加载成功
- if (inBuffer != nullptr) {
- // Aggressiveness mode (0, 1, 2, or 3)
- int16_t mode = 1;
- int per_ms = 30;
vadProcess(inBuffer, sampleRate, inSampleCount, mode, per_ms);
- free(inBuffer);
- }
- }
- int main(int argc, char *argv[]) {
- printf("WebRTC Voice Activity Detector\n");
- printf("博客: http://cpuimage.cnblogs.com/\n");
- printf("静音检测 \ n");
- if (argc < 2)
- return -1;
- char *in_file = argv[1];
- vad(in_file);
- printf("按任意键退出程序 \n");
- getchar();
- return 0;
- }
自动增益项目地址: https://github.com/cpuimage/WebRTC_AGC
具体流程为:
加载 wav(拖放 wav 文件到可执行文件上)-> 增益处理 -> 保存为_out.wav 文件
静音检测项目地址: https://github.com/cpuimage/WebRTC_VAD
具体流程为:
加载 wav(拖放 wav 文件到可执行文件上)-> 输出静音检测结果
备注 :1 为非静音, 0 为静音
该注意的地方和参数, 见代码注释.
用 cmake 即可进行编译示例代码, 详情见 CMakeLists.txt.
来源: https://www.cnblogs.com/cpuimage/p/8908551.html