ShengShengBuXi/ShengShengBuXi/Services/AudioProcessingService.cs
2025-03-28 15:55:39 +08:00

844 lines
26 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Microsoft.Extensions.Logging;
using NAudio.Dsp;
using NAudio.Utils;
using NAudio.Wave;
using ShengShengBuXi.Models;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace ShengShengBuXi.Services;
/// <summary>
/// 音频处理服务实现
/// </summary>
public class AudioProcessingService : IAudioProcessingService
{
private readonly ILogger<AudioProcessingService> _logger;
private readonly IConfigurationService _configService;
private readonly ConcurrentDictionary<string, WaveFileWriter> _activeRecordings = new ConcurrentDictionary<string, WaveFileWriter>();
private readonly ConcurrentDictionary<string, string> _recordingFilePaths = new ConcurrentDictionary<string, string>();
private bool _isDisposed;
/// <summary>
/// 当有新的语音转文字结果时触发
/// </summary>
public event EventHandler<SpeechToTextResult> SpeechToTextResultReceived;
/// <summary>
/// 当音频数据被保存为文件时触发
/// </summary>
public event EventHandler<string> AudioSavedToFile;
/// <summary>
/// 初始化音频处理服务
/// </summary>
/// <param name="logger">日志记录器</param>
/// <param name="configService">配置服务</param>
public AudioProcessingService(ILogger<AudioProcessingService> logger, IConfigurationService configService)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_configService = configService ?? throw new ArgumentNullException(nameof(configService));
}
/// <summary>
/// 初始化音频处理服务
/// </summary>
/// <returns>初始化是否成功</returns>
public bool Initialize()
{
try
{
_logger.LogInformation("初始化音频处理服务");
// 确保录音文件夹存在
var recordingsFolder = Path.Combine(Directory.GetCurrentDirectory(), _configService.CurrentConfig.Recording.RecordingsFolder);
if (!Directory.Exists(recordingsFolder))
{
Directory.CreateDirectory(recordingsFolder);
_logger.LogInformation($"已创建录音文件夹: {recordingsFolder}");
}
// 如果配置了自动清理,清理旧录音
if (_configService.CurrentConfig.Recording.AutoCleanupOldRecordings)
{
CleanupOldRecordings();
}
return true;
}
catch (Exception ex)
{
_logger.LogError($"初始化音频处理服务失败: {ex.Message}");
return false;
}
}
/// <summary>
/// 处理接收到的音频数据
/// </summary>
/// <param name="audioData">音频数据</param>
/// <param name="sampleRate">采样率</param>
/// <param name="channels">声道数</param>
/// <param name="clientId">客户端ID</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
public async Task ProcessAudioDataAsync(byte[] audioData, int sampleRate, int channels, string clientId, CancellationToken token = default)
{
if (audioData == null || audioData.Length == 0)
{
return;
}
if (string.IsNullOrEmpty(clientId))
{
throw new ArgumentNullException(nameof(clientId));
}
try
{
// 判断采样率如果高于16kHz则降采样
if (sampleRate > 16000)
{
_logger.LogInformation($"原始采样率为{sampleRate}Hz进行降采样到16000Hz");
audioData = ResampleAudio(audioData, sampleRate, 16000, 16, channels);
sampleRate = 16000; // 更新采样率为降采样后的值
}
// 检查是否有活动的录音
if (!_activeRecordings.TryGetValue(clientId, out var writer))
{
_logger.LogWarning($"客户端没有活动的录音会话: {clientId}");
return;
}
// 异步写入音频数据
await Task.Run(() => { writer.Write(audioData, 0, audioData.Length); writer.Flush(); }, token);
}
catch (Exception ex)
{
_logger.LogError($"处理音频数据失败: {ex.Message}");
}
}
/// <summary>
/// 对音频数据进行降采样
/// </summary>
/// <param name="audioData">原始音频数据</param>
/// <param name="originalSampleRate">原始采样率</param>
/// <param name="targetSampleRate">目标采样率</param>
/// <param name="bitsPerSample">采样位深</param>
/// <param name="channels">声道数</param>
/// <returns>降采样后的音频数据</returns>
private byte[] ResampleAudio(byte[] audioData, int originalSampleRate, int targetSampleRate, int bitsPerSample, int channels)
{
if (originalSampleRate == targetSampleRate)
{
return audioData;
}
try
{
// 简单的降采样算法 - 采样率转换比例
double ratio = (double)originalSampleRate / targetSampleRate;
// 计算新音频数据的字节数
int bytesPerSample = bitsPerSample / 8;
int samplesPerChannel = audioData.Length / (bytesPerSample * channels);
int newSamplesPerChannel = (int)(samplesPerChannel / ratio);
int newDataLength = newSamplesPerChannel * bytesPerSample * channels;
byte[] result = new byte[newDataLength];
// 对每个声道执行降采样
for (int i = 0; i < newSamplesPerChannel; i++)
{
int originalIndex = Math.Min((int)(i * ratio), samplesPerChannel - 1);
for (int ch = 0; ch < channels; ch++)
{
// 复制每个采样点的所有字节
int originalOffset = (originalIndex * channels + ch) * bytesPerSample;
int newOffset = (i * channels + ch) * bytesPerSample;
for (int b = 0; b < bytesPerSample; b++)
{
if (originalOffset + b < audioData.Length && newOffset + b < result.Length)
{
result[newOffset + b] = audioData[originalOffset + b];
}
}
}
}
_logger.LogInformation($"音频降采样完成: {audioData.Length}字节 -> {result.Length}字节");
return result;
}
catch (Exception ex)
{
_logger.LogError($"音频降采样失败: {ex.Message}");
return audioData; // 如果降采样失败,返回原始数据
}
}
/// <summary>
/// 应用降噪处理
/// </summary>
/// <param name="audioData">音频数据</param>
/// <param name="sampleRate">采样率</param>
/// <param name="channels">声道数</param>
/// <param name="noiseThreshold">噪声门限值</param>
/// <param name="attackSeconds">攻击时间</param>
/// <param name="releaseSeconds">释放时间</param>
/// <param name="highPassCutoff">高通滤波器截止频率</param>
/// <param name="q">滤波器Q值</param>
/// <returns></returns>
public byte[] ApplyNoiseReduction(byte[] audioData, int sampleRate = 16000, int channels = 1, float noiseThreshold = 0.015f, float attackSeconds = 0.01f, float releaseSeconds = 0.1f, int highPassCutoff = 80, float q = 1.0f)
{
// 调用内部实现
return ApplyNoiseReductionInternal(audioData, noiseThreshold, attackSeconds, releaseSeconds, highPassCutoff, q);
}
private byte[] ApplyNoiseReductionInternal(
byte[] audioData,
float noiseThreshold = 0.02f, // 噪声门限值
float attackSeconds = 0.01f, // 攻击时间
float releaseSeconds = 0.1f, // 释放时间
int highPassCutoff = 80, // 高通滤波器截止频率(Hz)
float q = 1.0f) // 滤波器Q值
{
// 1. 将字节数组转换为 WaveStream
using (var inputStream = new MemoryStream(audioData))
using (var waveStream = new RawSourceWaveStream(inputStream, new WaveFormat(16000, 16, 1)))
{
// 2. 转换为浮点样本便于处理
var sampleProvider = waveStream.ToSampleProvider();
// 3. 应用噪声门(Noise Gate)
var noiseGate = new NoiseGateSampleProvider(sampleProvider)
{
Threshold = noiseThreshold,
AttackSeconds = attackSeconds,
ReleaseSeconds = releaseSeconds
};
// 4. 应用高通滤波器去除低频噪音
var highPassFilter = new BiQuadFilterSampleProvider(noiseGate);
highPassFilter.Filter = BiQuadFilter.HighPassFilter(
sampleProvider.WaveFormat.SampleRate,
highPassCutoff,
q);
// 5. 处理后的音频转回字节数组
var outputStream = new MemoryStream();
WaveFileWriter.WriteWavFileToStream(outputStream, highPassFilter.ToWaveProvider16());
return outputStream.ToArray();
}
}
/// <summary>
/// 开始新的音频流处理
/// </summary>
/// <param name="clientId">客户端ID</param>
/// <param name="sampleRate">采样率</param>
/// <param name="channels">声道数</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
public Task StartAudioStreamAsync(string clientId, int sampleRate, int channels, CancellationToken token = default)
{
if (string.IsNullOrEmpty(clientId))
{
throw new ArgumentNullException(nameof(clientId));
}
// 如果已经有一个活动的录音,先结束它
if (_activeRecordings.TryGetValue(clientId, out var existingWriter))
{
_logger.LogWarning($"客户端已有活动的录音会话,先结束它: {clientId}");
EndAudioStreamAsync(clientId).Wait(token);
}
try
{
// 创建录音文件路径
var config = _configService.CurrentConfig.Recording;
var recordingsFolder = Path.Combine(Directory.GetCurrentDirectory(), config.RecordingsFolder);
if (!Directory.Exists(recordingsFolder))
{
Directory.CreateDirectory(recordingsFolder);
}
// 创建文件名
var fileName = string.Format(config.FileNameFormat, DateTime.Now);
var filePath = Path.Combine(recordingsFolder, fileName);
// 创建WaveFormat
var waveFormat = new WaveFormat(sampleRate, channels);
// 创建音频文件写入器
var writer = new WaveFileWriter(filePath, waveFormat);
// 添加到活动录音
if (_activeRecordings.TryAdd(clientId, writer))
{
_recordingFilePaths[clientId] = filePath;
_logger.LogInformation($"开始音频流处理: {clientId}, 文件: {filePath}");
}
else
{
_logger.LogError($"无法开始音频流处理,添加到活动录音失败: {clientId}");
writer.Dispose();
}
}
catch (Exception ex)
{
_logger.LogError($"开始音频流处理失败: {ex.Message}");
}
return Task.CompletedTask;
}
/// <summary>
/// 结束音频流处理
/// </summary>
/// <param name="clientId">客户端ID</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
public Task EndAudioStreamAsync(string clientId, CancellationToken token = default)
{
if (string.IsNullOrEmpty(clientId))
{
throw new ArgumentNullException(nameof(clientId));
}
try
{
// 移除并处理现有的录音
if (_activeRecordings.TryRemove(clientId, out var writer))
{
_recordingFilePaths.TryGetValue(clientId, out var filePath);
// 关闭和释放写入器
writer.Close();
writer.Dispose();
_logger.LogInformation($"结束音频流: {clientId}");
// 如果有文件路径,触发事件
if (!string.IsNullOrEmpty(filePath))
{
// 触发音频保存到文件事件
OnAudioSavedToFile(filePath);
}
}
}
catch (Exception ex)
{
_logger.LogError($"结束音频流处理失败: {ex.Message}");
}
return Task.CompletedTask;
}
/// <summary>
/// 获取会话的录音文件路径
/// </summary>
/// <param name="clientId">客户端ID</param>
/// <returns>录音文件路径如果没有则返回null</returns>
public string GetRecordingFilePath(string clientId)
{
if (string.IsNullOrEmpty(clientId))
{
return null;
}
if (_recordingFilePaths.TryGetValue(clientId, out var filePath))
{
return filePath;
}
return null;
}
/// <summary>
/// 获取当前正在处理的音频流
/// </summary>
/// <param name="clientId">客户端ID</param>
/// <returns>音频流</returns>
public Stream GetCurrentAudioStream(string clientId)
{
if (string.IsNullOrEmpty(clientId))
{
throw new ArgumentNullException(nameof(clientId));
}
if (!_recordingFilePaths.TryGetValue(clientId, out var filePath) || !File.Exists(filePath))
{
return null;
}
try
{
// 由于录音正在进行中,我们返回一个只读的流
return new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
}
catch (Exception ex)
{
_logger.LogError($"获取当前音频流失败: {ex.Message}");
return null;
}
}
/// <summary>
/// 获取最近的录音文件
/// </summary>
/// <param name="count">文件数量</param>
/// <returns>录音文件信息数组(文件名)</returns>
public string[] GetRecentRecordings(int count = 10)
{
try
{
var recordingsFolder = Path.Combine(Directory.GetCurrentDirectory(), _configService.CurrentConfig.Recording.RecordingsFolder);
_logger.LogInformation($"查找录音文件夹: {recordingsFolder}");
if (!Directory.Exists(recordingsFolder))
{
_logger.LogWarning($"录音文件夹不存在: {recordingsFolder}");
Directory.CreateDirectory(recordingsFolder);
_logger.LogInformation($"已创建录音文件夹: {recordingsFolder}");
return Array.Empty<string>();
}
// 获取所有WAV文件并按最近修改时间排序
var files = Directory.GetFiles(recordingsFolder, "*.wav")
.Select(f => new FileInfo(f))
.OrderByDescending(f => f.CreationTime)
.Take(count)
.ToArray();
_logger.LogInformation($"找到 {files.Length} 个录音文件");
// 只返回文件名,不包含路径
var fileNames = files.Select(f => f.Name).ToArray();
_logger.LogInformation($"返回录音文件: {string.Join(", ", fileNames)}");
return fileNames;
}
catch (Exception ex)
{
_logger.LogError($"获取最近录音失败: {ex.Message}");
return Array.Empty<string>();
}
}
/// <summary>
/// 清理旧录音
/// </summary>
private void CleanupOldRecordings()
{
try
{
var config = _configService.CurrentConfig.Recording;
var recordingsFolder = Path.Combine(Directory.GetCurrentDirectory(), config.RecordingsFolder);
if (!Directory.Exists(recordingsFolder))
{
return;
}
var threshold = DateTime.Now.AddDays(-config.KeepRecordingsDays);
var oldFiles = Directory.GetFiles(recordingsFolder, "*.wav")
.Select(f => new FileInfo(f))
.Where(f => f.LastWriteTime < threshold)
.ToArray();
foreach (var file in oldFiles)
{
try
{
file.Delete();
_logger.LogInformation($"已删除旧录音: {file.FullName}");
}
catch (Exception ex)
{
_logger.LogWarning($"删除旧录音失败: {file.FullName}, 错误: {ex.Message}");
}
}
_logger.LogInformation($"清理旧录音完成,删除了 {oldFiles.Length} 个文件");
}
catch (Exception ex)
{
_logger.LogError($"清理旧录音失败: {ex.Message}");
}
}
/// <summary>
/// 触发音频保存事件
/// </summary>
/// <param name="filePath">文件路径</param>
protected virtual void OnAudioSavedToFile(string filePath)
{
AudioSavedToFile?.Invoke(this, filePath);
}
/// <summary>
/// 触发语音转文字结果事件
/// </summary>
/// <param name="result">结果</param>
protected virtual void OnSpeechToTextResultReceived(SpeechToTextResult result)
{
SpeechToTextResultReceived?.Invoke(this, result);
}
/// <summary>
/// 释放资源
/// </summary>
public void Dispose()
{
if (_isDisposed)
{
return;
}
_logger.LogInformation("释放音频处理服务资源");
// 结束所有活动的录音
foreach (var clientId in _activeRecordings.Keys.ToArray())
{
try
{
EndAudioStreamAsync(clientId).Wait();
}
catch (Exception ex)
{
_logger.LogError($"结束活动录音失败: {clientId}, 错误: {ex.Message}");
}
}
_isDisposed = true;
}
}
// 自定义噪声门实现
public class NoiseGateSampleProvider : ISampleProvider
{
private readonly ISampleProvider source;
private float threshold = 0.02f;
private float attackSeconds = 0.01f;
private float releaseSeconds = 0.1f;
private float currentLevel = 0;
private float attackRate;
private float releaseRate;
private bool open = false;
public NoiseGateSampleProvider(ISampleProvider source)
{
this.source = source;
CalculateAttackAndReleaseRates();
}
public WaveFormat WaveFormat => source.WaveFormat;
public float Threshold
{
get => threshold;
set
{
threshold = value;
}
}
public float AttackSeconds
{
get => attackSeconds;
set
{
attackSeconds = value;
CalculateAttackAndReleaseRates();
}
}
public float ReleaseSeconds
{
get => releaseSeconds;
set
{
releaseSeconds = value;
CalculateAttackAndReleaseRates();
}
}
private void CalculateAttackAndReleaseRates()
{
attackRate = 1.0f / (WaveFormat.SampleRate * attackSeconds);
releaseRate = 1.0f / (WaveFormat.SampleRate * releaseSeconds);
}
public int Read(float[] buffer, int offset, int count)
{
int samplesRead = source.Read(buffer, offset, count);
for (int i = 0; i < samplesRead; i++)
{
float currentSample = Math.Abs(buffer[offset + i]);
// 更新当前电平
if (currentSample > currentLevel)
{
// 攻击:升高比较快
currentLevel += attackRate * (currentSample - currentLevel);
}
else
{
// 释放:降低比较慢
currentLevel -= releaseRate * (currentLevel - currentSample);
if (currentLevel < 0) currentLevel = 0;
}
// 应用噪声门
if (currentLevel >= threshold)
{
open = true;
}
else if (open && currentLevel < threshold * 0.5f) // 加入一点滞后效应
{
open = false;
}
// 根据噪声门状态保留或衰减信号
if (!open)
{
buffer[offset + i] *= 0.05f; // 不完全消除仅保留5%的信号强度
}
}
return samplesRead;
}
}
// 简单的BiQuad滤波器包装
public class BiQuadFilterSampleProvider : ISampleProvider
{
private readonly ISampleProvider source;
private BiQuadFilter filter;
public BiQuadFilterSampleProvider(ISampleProvider source)
{
this.source = source;
this.WaveFormat = source.WaveFormat;
}
public BiQuadFilter Filter
{
get => filter;
set => filter = value;
}
public WaveFormat WaveFormat { get; }
public int Read(float[] buffer, int offset, int count)
{
int samplesRead = source.Read(buffer, offset, count);
if (filter != null)
{
for (int n = 0; n < samplesRead; n++)
{
buffer[offset + n] = filter.Transform(buffer[offset + n]);
}
}
return samplesRead;
}
}
public class ImprovedNoiseGate : ISampleProvider
{
private readonly ISampleProvider source;
private float threshold;
private float attackSeconds;
private float releaseSeconds;
private float holdSeconds;
private float envelope;
private bool gateOpen;
private int holdCountRemaining;
public ImprovedNoiseGate(ISampleProvider source)
{
this.source = source ?? throw new ArgumentNullException(nameof(source));
this.WaveFormat = source.WaveFormat;
// 默认参数
Threshold = 0.015f;
AttackSeconds = 0.05f;
ReleaseSeconds = 0.3f;
HoldSeconds = 0.2f;
}
public WaveFormat WaveFormat { get; }
/// <summary>
/// 噪声门阈值 (0.0-1.0)
/// </summary>
public float Threshold
{
get => threshold;
set => threshold = Math.Max(0.0f, Math.Min(1.0f, value));
}
/// <summary>
/// 启动时间 (秒)
/// </summary>
public float AttackSeconds
{
get => attackSeconds;
set => attackSeconds = Math.Max(0.001f, value);
}
/// <summary>
/// 释放时间 (秒)
/// </summary>
public float ReleaseSeconds
{
get => releaseSeconds;
set => releaseSeconds = Math.Max(0.001f, value);
}
/// <summary>
/// 保持时间 (秒),在信号低于阈值后保持门打开的时间
/// </summary>
public float HoldSeconds
{
get => holdSeconds;
set => holdSeconds = Math.Max(0.0f, value);
}
/// <summary>
/// 当前包络值 (只读)
/// </summary>
public float CurrentEnvelope => envelope;
/// <summary>
/// 当前门状态 (只读)
/// </summary>
public bool IsGateOpen => gateOpen;
public int Read(float[] buffer, int offset, int count)
{
int samplesRead = source.Read(buffer, offset, count);
// 预计算系数
float attackCoeff = CalculateCoefficient(AttackSeconds);
float releaseCoeff = CalculateCoefficient(ReleaseSeconds);
int holdSamples = (int)(WaveFormat.SampleRate * HoldSeconds);
for (int n = 0; n < samplesRead; n++)
{
float sample = buffer[offset + n];
float absSample = Math.Abs(sample);
// 更新包络
if (absSample > envelope)
{
envelope = absSample + (envelope - absSample) * attackCoeff;
}
else
{
envelope = absSample + (envelope - absSample) * releaseCoeff;
}
// 更新门状态
if (envelope > Threshold)
{
gateOpen = true;
holdCountRemaining = holdSamples; // 重置保持计数器
}
else if (holdCountRemaining > 0)
{
holdCountRemaining--;
}
else
{
gateOpen = false;
}
// 应用增益 (带平滑过渡)
float gain = gateOpen ? 1.0f : CalculateSoftGain(envelope);
buffer[offset + n] = sample * gain;
}
return samplesRead;
}
private float CalculateCoefficient(float timeInSeconds)
{
if (timeInSeconds <= 0.0f) return 0.0f;
return (float)Math.Exp(-1.0 / (WaveFormat.SampleRate * timeInSeconds));
}
private float CalculateSoftGain(float env)
{
// 软过渡:当包络接近阈值时逐渐降低增益
if (env >= Threshold) return 1.0f;
// 计算相对阈值的位置 (0.0-1.0)
float relativePosition = env / Threshold;
// 三次方曲线实现平滑过渡
return relativePosition * relativePosition * relativePosition;
}
/// <summary>
/// 重置噪声门状态
/// </summary>
public void Reset()
{
envelope = 0.0f;
gateOpen = false;
holdCountRemaining = 0;
}
}
// 新增平滑处理器
public class SmoothingSampleProvider : ISampleProvider
{
private readonly ISampleProvider source;
private readonly float[] history;
private int historyIndex;
public SmoothingSampleProvider(ISampleProvider source, int windowSize = 5)
{
this.source = source;
this.history = new float[windowSize];
this.WaveFormat = source.WaveFormat;
}
public WaveFormat WaveFormat { get; }
public int Read(float[] buffer, int offset, int count)
{
int samplesRead = source.Read(buffer, offset, count);
for (int n = 0; n < samplesRead; n++)
{
history[historyIndex] = buffer[offset + n];
historyIndex = (historyIndex + 1) % history.Length;
// 简单移动平均平滑
float sum = 0;
for (int i = 0; i < history.Length; i++)
sum += history[i];
buffer[offset + n] = sum / history.Length;
}
return samplesRead;
}
}