ShengShengBuXi/ShengShengBuXi/Services/SpeechToTextService.cs
2025-03-28 04:34:33 +08:00

1453 lines
60 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Concurrent;
using System.IO;
using System.Net.WebSockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;
using ShengShengBuXi.Models;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Web;
namespace ShengShengBuXi.Services
{
/// <summary>
/// 语音识别会话
/// </summary>
public class SpeechRecognitionSession
{
/// <summary>
/// 会话ID
/// </summary>
public string SessionId { get; set; }
/// <summary>
/// 会话是否存在
/// </summary>
public bool Exists { get; set; }
/// <summary>
/// 会话是否活跃(WebSocket连接打开)
/// </summary>
public bool IsActive { get; set; }
/// <summary>
/// 是否已保存识别文本
/// </summary>
public bool HasSavedText { get; set; }
/// <summary>
/// 是否有待保存的文本
/// </summary>
public bool HasPendingText { get; set; }
/// <summary>
/// 待保存的文本内容
/// </summary>
public string PendingText { get; set; }
/// <summary>
/// 最终识别文本
/// </summary>
public string FinalText { get; set; }
/// <summary>
/// 录音文件路径
/// </summary>
public string RecordingPath { get; set; }
/// <summary>
/// 识别文字的文件的路径
/// </summary>
public string TextFilePath { get; set; }
/// <summary>
/// 是否已发送到大屏队列
/// </summary>
public bool IsSentToDisplay { get; set; }
/// <summary>
/// 采样率
/// </summary>
public int SampleRate { get; set; } = 16000;
/// <summary>
/// 语言代码
/// </summary>
public string Language { get; set; } = "zh-CN";
/// <summary>
/// 识别的文本内容列表
/// </summary>
public List<string> RecognizedTexts { get; set; } = new List<string>();
/// <summary>
/// 临时语音识别文本
/// </summary>
public string VoiceTextStr { get; set; }
/// <summary>
/// 会话创建时间
/// </summary>
public DateTime CreatedTime { get; set; } = DateTime.Now;
/// <summary>
/// 最后活动时间
/// </summary>
public DateTime LastActivityTime { get; set; } = DateTime.Now;
}
/// <summary>
/// 语音识别服务实现 - 腾讯云实时语音识别
/// </summary>
public class SpeechToTextService : ISpeechToTextService
{
private readonly ILogger<SpeechToTextService> _logger;
private readonly IConfigurationService _configService;
// 用于存储各个会话的WebSocket客户端
private static ConcurrentDictionary<string, ClientWebSocket> _webSockets = new ConcurrentDictionary<string, ClientWebSocket>();
// 用于存储各个会话的取消令牌
private readonly ConcurrentDictionary<string, CancellationTokenSource> _cancellationTokens = new ConcurrentDictionary<string, CancellationTokenSource>();
// 存储会话状态信息
private readonly ConcurrentDictionary<string, SpeechRecognitionSession> _sessions = new ConcurrentDictionary<string, SpeechRecognitionSession>();
private bool _isDisposed;
/// <summary>
/// 当有新的语音转文字结果时触发
/// </summary>
public event EventHandler<SpeechToTextResult> ResultReceived;
/// <summary>
/// 初始化语音识别服务
/// </summary>
/// <param name="logger">日志记录器</param>
/// <param name="configService">配置服务</param>
public SpeechToTextService(ILogger<SpeechToTextService> logger, IConfigurationService configService)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_configService = configService ?? throw new ArgumentNullException(nameof(configService));
}
/// <summary>
/// 初始化语音识别服务
/// </summary>
/// <returns>初始化是否成功</returns>
public bool Initialize()
{
try
{
_logger.LogInformation("腾讯云语音识别服务初始化");
// 检查腾讯云配置是否有效
var config = _configService.CurrentConfig?.Network?.TencentCloudASR;
if (config == null || string.IsNullOrEmpty(config.AppId) ||
string.IsNullOrEmpty(config.SecretId) || string.IsNullOrEmpty(config.SecretKey))
{
_logger.LogWarning("腾讯云语音识别配置无效,请检查配置");
_logger.LogInformation($"腾讯云配置 - AppId: '{config?.AppId ?? "null"}', SecretId: '{(string.IsNullOrEmpty(config?.SecretId) ? "" : "")}', SecretKey: '{(string.IsNullOrEmpty(config?.SecretKey) ? "" : "")}'");
return false;
}
_logger.LogInformation($"腾讯云配置 - AppId: '{config.AppId}', SecretId: '{(string.IsNullOrEmpty(config.SecretId) ? "" : "")}', SecretKey: '{(string.IsNullOrEmpty(config.SecretKey) ? "" : "")}'");
return true;
}
catch (Exception ex)
{
_logger.LogError($"语音识别服务初始化失败: {ex.Message}");
return false;
}
}
/// <summary>
/// 开始新的识别会话
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="sampleRate">采样率</param>
/// <param name="language">语言代码</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
public async Task StartSessionAsync(string sessionId, int sampleRate = 16000, string language = "zh-CN", CancellationToken token = default)
{
if (string.IsNullOrEmpty(sessionId))
{
throw new ArgumentNullException(nameof(sessionId));
}
// 结束可能存在的旧会话
await EndSessionAsync(sessionId, token);
try
{
// 检查配置是否有效
if (!Initialize())
{
_logger.LogError("腾讯云语音识别服务初始化失败,请检查配置");
throw new InvalidOperationException("语音识别服务未正确初始化,无法启动会话");
}
// 创建新会话
var session = new SpeechRecognitionSession
{
SessionId = sessionId,
SampleRate = sampleRate,
Language = language,
CreatedTime = DateTime.Now,
LastActivityTime = DateTime.Now,
IsActive = true,
Exists = true
};
_sessions[sessionId] = session;
// 创建取消令牌
var cts = new CancellationTokenSource();
_cancellationTokens[sessionId] = cts;
// 使用链接的取消令牌
var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, token);
// 创建新的WebSocket客户端
var webSocket = new ClientWebSocket();
_webSockets[sessionId] = webSocket;
try
{
// 构建腾讯云实时语音识别WebSocket URL
string url = BuildTencentCloudASRUrl(sessionId);
// 连接到腾讯云WebSocket服务器
await webSocket.ConnectAsync(new Uri(url), linkedCts.Token);
_logger.LogInformation($"腾讯云WebSocket连接成功: {sessionId}");
_logger.LogInformation($"开始语音识别会话: {sessionId}, 采样率: {sampleRate}, 语言: {language}");
// 在后台接收识别结果
_ = ReceiveResultsAsync(sessionId, webSocket, linkedCts.Token);
}
catch (WebSocketException wsEx)
{
_logger.LogError($"WebSocket连接失败: {wsEx.Message}, WebSocketError: {wsEx.WebSocketErrorCode}");
throw;
}
catch (Exception ex)
{
_logger.LogError($"连接腾讯云服务失败: {ex.Message}");
throw;
}
}
catch (Exception ex)
{
_logger.LogError($"开始语音识别会话失败: {ex.Message}");
await EndSessionAsync(sessionId, token);
throw;
}
}
/// <summary>
/// 结束识别会话
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
public async Task EndSessionAsync(string sessionId, CancellationToken token = default)
{
if (string.IsNullOrEmpty(sessionId))
{
throw new ArgumentNullException(nameof(sessionId));
}
_logger.LogInformation($"结束语音识别会话: {sessionId}");
// 获取会话
if (_sessions.TryGetValue(sessionId, out var session) && (session.RecognizedTexts.Count > 0 || session.VoiceTextStr?.Length > 0))
{
//
if (session.RecognizedTexts.Count == 0)
{
session.RecognizedTexts.Add(session.VoiceTextStr ?? "");
}
// 添加去重处理
var uniqueTexts = RemoveDuplicateTexts(session.RecognizedTexts);
string finalText = string.Join("\n", uniqueTexts);
_logger.LogInformation($"在结束会话前保存会话 {sessionId} 的所有已收集文本");
SaveRecognitionText(sessionId, finalText);
// 更新会话状态
session.IsActive = false;
session.LastActivityTime = DateTime.Now;
_sessions[sessionId] = session;
}
// 获取会话的WebSocket客户端
if (_webSockets.TryRemove(sessionId, out var webSocket))
{
try
{
// 发送结束消息
if (webSocket.State == WebSocketState.Open)
{
try
{
var endMessage = Encoding.UTF8.GetBytes("{\"type\": \"end\"}");
await webSocket.SendAsync(new ArraySegment<byte>(endMessage), WebSocketMessageType.Text, true, token);
// 等待片刻以接收最终结果
await Task.Delay(500, token);
}
catch (WebSocketException wsEx) when (wsEx.Message.Contains("remote party closed") ||
wsEx.Message.Contains("connection was aborted") ||
wsEx.Message.Contains("connection has been terminated"))
{
// 腾讯云服务器已关闭连接,无需发送结束消息
_logger.LogDebug($"发送结束消息时发现WebSocket已关闭: {sessionId}");
}
}
// 关闭WebSocket连接
if (webSocket.State == WebSocketState.Open)
{
try
{
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Session ended", token);
}
catch (WebSocketException wsEx) when (wsEx.Message.Contains("remote party closed") ||
wsEx.Message.Contains("connection was aborted") ||
wsEx.Message.Contains("connection has been terminated"))
{
// 腾讯云服务器已关闭连接,无需发送关闭帧
_logger.LogDebug($"关闭WebSocket时发现连接已关闭: {sessionId}");
}
}
}
catch (Exception ex)
{
_logger.LogWarning($"关闭WebSocket连接失败: {ex.Message}");
}
finally
{
// 无论成功与否,都释放资源
webSocket.Dispose();
}
}
// 取消后台任务
if (_cancellationTokens.TryRemove(sessionId, out var cts))
{
try
{
cts.Cancel();
cts.Dispose();
}
catch (Exception ex)
{
_logger.LogWarning($"取消任务失败: {ex.Message}");
}
}
// 注意:不要完全移除会话,因为可能还需要会话信息
if (_sessions.TryGetValue(sessionId, out session))
{
session.IsActive = false;
_sessions[sessionId] = session;
}
}
/// <summary>
/// 处理音频数据
/// </summary>
/// <param name="audioData">音频数据</param>
/// <param name="sessionId">会话ID</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
public async Task ProcessAudioAsync(byte[] audioData, string sessionId, CancellationToken token = default)
{
if (audioData == null || audioData.Length == 0)
{
_logger.LogWarning("音频数据为空");
return;
}
if (string.IsNullOrEmpty(sessionId))
{
_logger.LogWarning("会话Id为空");
throw new ArgumentNullException(nameof(sessionId));
}
// 检查会话是否存在
if (!_webSockets.TryGetValue(sessionId, out var webSocket))
{
_logger.LogWarning($"ProcessAudioAsync->会话不存在: {sessionId}");
return;
}
// 更新会话最后活动时间
if (_sessions.TryGetValue(sessionId, out var session))
{
session.LastActivityTime = DateTime.Now;
_sessions[sessionId] = session;
}
// 检查WebSocket状态
if (webSocket.State != WebSocketState.Open)
{
_logger.LogWarning($"WebSocket连接已关闭尝试重新连接: {sessionId}");
// 获取会话信息
int sampleRate = 16000;
string language = "zh-CN";
if (_sessions.TryGetValue(sessionId, out session))
{
sampleRate = session.SampleRate;
language = session.Language;
}
// 重新开始会话
try
{
await StartSessionAsync(sessionId, sampleRate, language, token);
// 重新获取WebSocket
if (!_webSockets.TryGetValue(sessionId, out webSocket) || webSocket.State != WebSocketState.Open)
{
_logger.LogError($"重新连接失败,无法处理音频数据: {sessionId}");
return;
}
}
catch (Exception ex)
{
_logger.LogError($"重新连接失败: {ex.Message}");
return;
}
}
try
{
// 将音频数据发送到腾讯云WebSocket服务器
await webSocket.SendAsync(new ArraySegment<byte>(audioData), WebSocketMessageType.Binary, true, token);
}
catch (WebSocketException wsEx) when (wsEx.Message.Contains("remote party closed") ||
wsEx.Message.Contains("connection was aborted") ||
wsEx.Message.Contains("connection has been terminated"))
{
_logger.LogWarning($"发送音频数据时WebSocket已关闭尝试重新连接: {sessionId}");
// 获取会话信息
int sampleRate = 16000;
string language = "zh-CN";
if (_sessions.TryGetValue(sessionId, out session))
{
sampleRate = session.SampleRate;
language = session.Language;
}
// 重新开始会话
try
{
await StartSessionAsync(sessionId, sampleRate, language, token);
// 重试发送音频数据
if (_webSockets.TryGetValue(sessionId, out var newWebSocket) && newWebSocket.State == WebSocketState.Open)
{
await newWebSocket.SendAsync(new ArraySegment<byte>(audioData), WebSocketMessageType.Binary, true, token);
}
}
catch (Exception ex)
{
_logger.LogError($"重新连接并发送数据失败: {ex.Message}");
}
}
catch (Exception ex)
{
_logger.LogError($"发送音频数据失败: {ex.Message}");
}
}
/// <summary>
/// 处理音频文件
/// </summary>
/// <param name="audioFile">音频文件路径</param>
/// <param name="token">取消令牌</param>
/// <returns>识别结果</returns>
public async Task<SpeechToTextResult> ProcessAudioFileAsync(string audioFile, CancellationToken token = default)
{
if (string.IsNullOrEmpty(audioFile) || !File.Exists(audioFile))
{
throw new FileNotFoundException("音频文件不存在", audioFile);
}
// 创建一个临时会话ID
string sessionId = Guid.NewGuid().ToString();
try
{
// 开始会话
await StartSessionAsync(sessionId, 16000, "zh-CN", token);
// 读取音频文件
byte[] audioData = await File.ReadAllBytesAsync(audioFile, token);
// 这里可能需要根据文件格式进行转换以匹配腾讯云API要求的格式
// 当前假设文件已经是正确的PCM格式
// 处理音频数据
await ProcessAudioAsync(audioData, sessionId, token);
// 结束会话
await EndSessionAsync(sessionId, token);
// 返回一个空的结果,因为实际结果会通过事件发送
return new SpeechToTextResult
{
Id = sessionId,
Text = "音频文件处理完成,结果将通过事件发送",
IsFinal = true,
RecordingFileName = Path.GetFileName(audioFile)
};
}
catch (Exception ex)
{
_logger.LogError($"处理音频文件失败: {ex.Message}");
// 确保会话已结束
await EndSessionAsync(sessionId, token);
throw;
}
}
/// <summary>
/// 处理音频流
/// </summary>
/// <param name="audioStream">音频流</param>
/// <param name="token">取消令牌</param>
/// <returns>识别结果</returns>
public async Task<SpeechToTextResult> ProcessAudioStreamAsync(Stream audioStream, CancellationToken token = default)
{
if (audioStream == null)
{
throw new ArgumentNullException(nameof(audioStream));
}
// 创建一个临时会话ID
string sessionId = Guid.NewGuid().ToString();
try
{
// 开始会话
await StartSessionAsync(sessionId, 16000, "zh-CN", token);
// 读取音频流
byte[] buffer = new byte[16000]; // 大约1秒的音频数据16000Hz16位单声道
int bytesRead;
while ((bytesRead = await audioStream.ReadAsync(buffer, 0, buffer.Length, token)) > 0)
{
// 创建一个正确大小的数组
byte[] audioData = new byte[bytesRead];
Array.Copy(buffer, audioData, bytesRead);
// 处理音频数据
await ProcessAudioAsync(audioData, sessionId, token);
// 等待一段时间,模拟实时处理
await Task.Delay(100, token);
}
// 结束会话
await EndSessionAsync(sessionId, token);
// 返回一个空的结果,因为实际结果会通过事件发送
return new SpeechToTextResult
{
Id = sessionId,
Text = "音频流处理完成,结果将通过事件发送",
IsFinal = true
};
}
catch (Exception ex)
{
_logger.LogError($"处理音频流失败: {ex.Message}");
// 确保会话已结束
await EndSessionAsync(sessionId, token);
throw;
}
}
/// <summary>
/// 设置会话的录音文件路径
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="recordingPath">录音文件路径</param>
public void SetSessionRecordingPath(string sessionId, string recordingPath)
{
if (string.IsNullOrEmpty(sessionId) || string.IsNullOrEmpty(recordingPath))
return;
if (_sessions.TryGetValue(sessionId, out var session))
{
session.RecordingPath = recordingPath;
_sessions[sessionId] = session;
}
else
{
// 如果会话不存在,创建一个新的
session = new SpeechRecognitionSession
{
SessionId = sessionId,
RecordingPath = recordingPath,
Exists = true,
CreatedTime = DateTime.Now,
LastActivityTime = DateTime.Now
};
_sessions[sessionId] = session;
}
_logger.LogInformation($"设置会话 {sessionId} 的录音文件路径: {recordingPath}");
}
/// <summary>
/// 将识别文本保存到文件
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="finalText">最终文本</param>
private void SaveRecognitionText(string sessionId, string finalText)
{
if (string.IsNullOrEmpty(sessionId) || string.IsNullOrEmpty(finalText))
return;
try
{
// 获取会话
if (!_sessions.TryGetValue(sessionId, out var session))
{
session = new SpeechRecognitionSession
{
SessionId = sessionId,
Exists = true,
CreatedTime = DateTime.Now,
LastActivityTime = DateTime.Now
};
_sessions[sessionId] = session;
}
// 检查录音文件路径
if (string.IsNullOrEmpty(session.RecordingPath))
{
_logger.LogWarning($"未找到会话 {sessionId} 的录音文件路径,无法保存文本");
// 将文本保存到会话中
session.PendingText = finalText;
session.HasPendingText = true;
_sessions[sessionId] = session;
// 启动后台任务,定期检查是否可以保存
StartPendingTextSaver(sessionId);
return;
}
// 生成文本文件路径 (与录音文件同名,但扩展名为.txt)
string textFilePath = Path.ChangeExtension(session.RecordingPath, ".txt");
session.TextFilePath = textFilePath;
// 确保目录存在
string directory = Path.GetDirectoryName(textFilePath);
if (!string.IsNullOrEmpty(directory) && !Directory.Exists(directory))
{
Directory.CreateDirectory(directory);
}
// 保存文本到文件
int retryCount = 0;
const int maxRetries = 3;
bool saved = false;
while (!saved && retryCount < maxRetries)
{
try
{
File.WriteAllText(textFilePath, finalText);
saved = true;
_logger.LogInformation($"保存识别文本到文件 {textFilePath}");
// 更新会话状态
session.HasSavedText = true;
session.HasPendingText = false;
session.PendingText = null;
session.FinalText = finalText;
_sessions[sessionId] = session;
}
catch (IOException ioEx)
{
// 文件可能被占用,等待一段时间后重试
retryCount++;
_logger.LogWarning($"保存文本失败 (尝试 {retryCount}/{maxRetries}): {ioEx.Message}");
if (retryCount < maxRetries)
{
Thread.Sleep(500 * retryCount); // 逐渐增加等待时间
}
}
}
if (!saved)
{
// 如果重试后仍然失败,将文本保存到备用文件
string backupPath = Path.Combine(
Path.GetDirectoryName(textFilePath) ?? string.Empty,
$"backup_{Path.GetFileNameWithoutExtension(textFilePath)}_{DateTime.Now:yyyyMMdd_HHmmss}.txt"
);
try
{
File.WriteAllText(backupPath, finalText);
_logger.LogInformation($"已将识别文本保存到备用文件: {backupPath}");
}
catch (Exception backupEx)
{
_logger.LogError($"保存识别文本到备用文件失败: {backupEx.Message}");
}
}
}
catch (Exception ex)
{
_logger.LogError($"保存识别文本失败: {ex.Message}");
}
}
/// <summary>
/// 启动后台任务检查和保存等待的文本
/// </summary>
private void StartPendingTextSaver(string sessionId)
{
Task.Run(async () =>
{
// 最多等待30秒
for (int i = 0; i < 15; i++)
{
// 检查会话是否存在,是否有待保存文本
if (!_sessions.TryGetValue(sessionId, out var session) || !session.HasPendingText)
{
return;
}
// 检查是否已设置录音路径
if (!string.IsNullOrEmpty(session.RecordingPath))
{
_logger.LogInformation($"发现会话 {sessionId} 的录音文件路径,准备保存之前缓存的文本");
// 生成文本文件路径
string textFilePath = Path.ChangeExtension(session.RecordingPath, ".txt");
try
{
// 确保目录存在
string directory = Path.GetDirectoryName(textFilePath);
if (!string.IsNullOrEmpty(directory) && !Directory.Exists(directory))
{
Directory.CreateDirectory(directory);
}
session.TextFilePath = textFilePath;
// 保存文本
File.WriteAllText(textFilePath, session.PendingText);
_logger.LogInformation($"成功保存延迟的识别文本到文件 {textFilePath}");
// 更新会话状态
session.HasSavedText = true;
session.HasPendingText = false;
session.PendingText = null;
_sessions[sessionId] = session;
return;
}
catch (Exception ex)
{
_logger.LogError($"保存延迟识别文本失败: {ex.Message}");
}
}
// 等待2秒后再次检查
await Task.Delay(2000);
}
// 如果超时后仍有未保存的文本,尝试保存到备用位置
if (_sessions.TryGetValue(sessionId, out var timeoutSession) && timeoutSession.HasPendingText)
{
string backupDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "backup_texts");
if (!Directory.Exists(backupDir))
{
Directory.CreateDirectory(backupDir);
}
string backupPath = Path.Combine(backupDir, $"session_{sessionId}_{DateTime.Now:yyyyMMdd_HHmmss}.txt");
timeoutSession.TextFilePath = backupPath;
try
{
File.WriteAllText(backupPath, timeoutSession.PendingText);
_logger.LogInformation($"已将超时未保存的识别文本保存到备用位置: {backupPath}");
}
catch (Exception ex)
{
_logger.LogError($"保存超时识别文本到备用位置失败: {ex.Message}");
}
}
});
}
/// <summary>
/// 接收识别结果
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="webSocket">WebSocket客户端</param>
/// <param name="token">取消令牌</param>
/// <returns>异步任务</returns>
private async Task ReceiveResultsAsync(string sessionId, ClientWebSocket webSocket, CancellationToken token)
{
byte[] buffer = new byte[8192];
StringBuilder messageBuilder = new StringBuilder();
try
{
// 确保该会话存在
if (!_sessions.TryGetValue(sessionId, out var session))
{
session = new SpeechRecognitionSession
{
SessionId = sessionId,
Exists = true,
IsActive = true,
CreatedTime = DateTime.Now,
LastActivityTime = DateTime.Now
};
_sessions[sessionId] = session;
}
while (webSocket.State == WebSocketState.Open && !token.IsCancellationRequested)
{
WebSocketReceiveResult result = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), token);
if (result.MessageType == WebSocketMessageType.Close)
{
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Response complete", token);
break;
}
if (result.MessageType == WebSocketMessageType.Text)
{
string message = Encoding.UTF8.GetString(buffer, 0, result.Count);
messageBuilder.Append(message);
if (result.EndOfMessage)
{
string fullMessage = messageBuilder.ToString();
messageBuilder.Clear();
_logger.LogDebug($"收到识别结果: {fullMessage}");
try
{
// 解析腾讯云返回的结果
var tencentResult = JsonConvert.DeserializeObject<TencentCloudASRResult>(fullMessage);
// 检查结果是否有效
if (tencentResult != null && tencentResult.Code == 0)
{
// 如果有结果数据
if (tencentResult.Result != null && !string.IsNullOrEmpty(tencentResult.Result.VoiceTextStr))
{
// 获取文本
string recognizedText = tencentResult.Result.VoiceTextStr;
// 重新获取最新的会话状态
if (_sessions.TryGetValue(sessionId, out session))
{
session.LastActivityTime = DateTime.Now;
if (tencentResult.Result.SliceType == 0 || tencentResult.Result.SliceType == 1)
{
// 中间结果,只用于显示,不加入最终保存列表
// 我们仍然触发事件通知UI更新但标记为非最终结果
var tempResult = new SpeechToTextResult
{
Id = tencentResult.MessageId,
Text = recognizedText,
IsFinal = false,
Confidence = 1.0f,
Language = session.Language,
StartTimeMs = tencentResult.Result.StartTime,
EndTimeMs = tencentResult.Result.EndTime,
CreatedAt = DateTime.Now,
IsForDisplayOnly = true // 标记为仅用于显示的中间结果
};
session.VoiceTextStr = recognizedText;
// 触发临时结果事件
OnResultReceived(tempResult);
}
else if (tencentResult.Result.SliceType == 2)
{
// 一句话结束,这时才添加到最终保存列表
bool shouldAdd = true;
// 检查是否与已有文本重复
if (session.RecognizedTexts.Count > 0)
{
// 与最后一条比较相似度
string lastText = session.RecognizedTexts.Last();
double similarity = CalculateTextSimilarity(lastText, recognizedText);
// 如果相似度超过70%,则认为重复,只更新不添加
if (similarity > 0.7)
{
_logger.LogInformation($"检测到相似文本(相似度:{similarity:P2}),更新而不是添加");
session.RecognizedTexts[session.RecognizedTexts.Count - 1] = recognizedText; // 使用新的替换旧的
shouldAdd = false;
}
}
if (shouldAdd)
{
_logger.LogInformation($"句子完成,添加到保存列表: '{recognizedText}'");
session.RecognizedTexts.Add(recognizedText);
}
// 更新会话
_sessions[sessionId] = session;
// 每当一句话结束,立即保存当前累积的所有文本
// 这样可以确保每句话识别完毕后就保存一次
SaveLatestRecognitionText(sessionId, session.RecognizedTexts);
}
}
// 只对最终结果或句子结束触发完整事件
if (tencentResult.Final == 1 || tencentResult.Result.SliceType == 2)
{
var speechResult = new SpeechToTextResult
{
Id = tencentResult.MessageId,
Text = recognizedText,
IsFinal = true,
Confidence = 1.0f, // 腾讯云不返回置信度默认为1.0
Language = session.Language,
StartTimeMs = tencentResult.Result.StartTime,
EndTimeMs = tencentResult.Result.EndTime,
CreatedAt = DateTime.Now,
RecordingFilePath = session.RecordingPath,
RecordingFileName = !string.IsNullOrEmpty(session.RecordingPath) ? Path.GetFileName(session.RecordingPath) : null
};
// 触发结果事件
OnResultReceived(speechResult);
}
}
// 如果是最终结果,结束会话并保存文本
if (tencentResult.Final == 1)
{
_logger.LogInformation($"识别完成,结束会话: {sessionId}");
// 合并所有文本并保存
if (_sessions.TryGetValue(sessionId, out session) && session.RecognizedTexts.Count > 0)
{
// 先进行文本去重处理
var uniqueTexts = RemoveDuplicateTexts(session.RecognizedTexts);
string finalText = string.Join("\n", uniqueTexts);
SaveRecognitionText(sessionId, finalText);
// 更新会话状态
session.IsActive = false;
_sessions[sessionId] = session;
}
}
}
else if (tencentResult != null && tencentResult.Code != 0)
{
_logger.LogWarning($"识别失败: {tencentResult.Code}, {tencentResult.Message}");
}
}
catch (JsonException ex)
{
_logger.LogError($"解析识别结果失败: {ex.Message}, 原始消息: {fullMessage}");
}
}
}
}
}
catch (WebSocketException wsEx) when (wsEx.Message.Contains("remote party closed") ||
wsEx.Message.Contains("connection was aborted") ||
wsEx.Message.Contains("connection has been terminated") ||
wsEx.Message.Contains("without completing the close handshake"))
{
// 腾讯云服务器直接关闭连接,这是正常现象
_logger.LogInformation($"腾讯云服务器已关闭连接,识别会话{sessionId}结束");
// 保存已收集的文本
if (_sessions.TryGetValue(sessionId, out var session) && session.RecognizedTexts.Count > 0)
{
// 先进行文本去重处理
var uniqueTexts = RemoveDuplicateTexts(session.RecognizedTexts);
string finalText = string.Join("\n", uniqueTexts);
SaveRecognitionText(sessionId, finalText);
// 更新会话状态
session.IsActive = false;
_sessions[sessionId] = session;
}
}
catch (OperationCanceledException)
{
_logger.LogInformation($"接收任务已取消: {sessionId}");
// 保存已收集的文本
if (_sessions.TryGetValue(sessionId, out var session) && session.RecognizedTexts.Count > 0)
{
// 先进行文本去重处理
var uniqueTexts = RemoveDuplicateTexts(session.RecognizedTexts);
string finalText = string.Join("\n", uniqueTexts);
SaveRecognitionText(sessionId, finalText);
// 更新会话状态
session.IsActive = false;
_sessions[sessionId] = session;
}
}
catch (Exception ex)
{
_logger.LogError($"接收识别结果异常: {ex.Message}");
// 尝试保存已收集的文本
if (_sessions.TryGetValue(sessionId, out var session) && session.RecognizedTexts.Count > 0)
{
// 先进行文本去重处理
var uniqueTexts = RemoveDuplicateTexts(session.RecognizedTexts);
string finalText = string.Join("\n", uniqueTexts);
SaveRecognitionText(sessionId, finalText);
// 更新会话状态
session.IsActive = false;
_sessions[sessionId] = session;
}
}
finally
{
// 确保正常关闭WebSocket
if (webSocket.State == WebSocketState.Open)
{
try
{
// 使用一个新的取消令牌,避免使用已取消的令牌
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Receiving complete", CancellationToken.None);
}
catch (Exception ex)
{
_logger.LogDebug($"关闭WebSocket连接时发生异常: {ex.Message}");
}
}
}
}
/// <summary>
/// 计算两段文本的相似度
/// </summary>
/// <param name="text1">文本1</param>
/// <param name="text2">文本2</param>
/// <returns>相似度(0-1)</returns>
private double CalculateTextSimilarity(string text1, string text2)
{
if (string.IsNullOrEmpty(text1) || string.IsNullOrEmpty(text2))
return 0;
// 如果两段文本完全一样
if (text1 == text2)
return 1.0;
// 计算最长公共子串
int[,] dp = new int[text1.Length + 1, text2.Length + 1];
int maxLength = 0;
for (int i = 1; i <= text1.Length; i++)
{
for (int j = 1; j <= text2.Length; j++)
{
if (text1[i - 1] == text2[j - 1])
{
dp[i, j] = dp[i - 1, j - 1] + 1;
maxLength = Math.Max(maxLength, dp[i, j]);
}
}
}
// 计算相似度 (最长公共子串长度 / 较长文本的长度)
return (double)maxLength / Math.Max(text1.Length, text2.Length);
}
/// <summary>
/// 移除重复的文本
/// </summary>
/// <param name="texts">文本列表</param>
/// <returns>去重后的文本列表</returns>
private List<string> RemoveDuplicateTexts(List<string> texts)
{
if (texts == null || texts.Count <= 1)
return texts?.ToList() ?? new List<string>();
var result = new List<string>();
// 第一条总是添加
result.Add(texts[0]);
// 依次检查后面的文本
for (int i = 1; i < texts.Count; i++)
{
bool isDuplicate = false;
// 与已添加的所有文本比较
foreach (var existingText in result)
{
double similarity = CalculateTextSimilarity(existingText, texts[i]);
// 如果相似度超过70%,视为重复
if (similarity > 0.7)
{
isDuplicate = true;
_logger.LogInformation($"移除重复文本,相似度: {similarity:P2}");
break;
}
}
// 如果不是重复的,添加到结果中
if (!isDuplicate)
{
result.Add(texts[i]);
}
}
return result;
}
/// <summary>
/// 构建腾讯云实时语音识别WebSocket URL
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <returns>WebSocket URL</returns>
private string BuildTencentCloudASRUrl(string sessionId)
{
try
{
var config = _configService.CurrentConfig.Network.TencentCloudASR;
int sampleRate = 16000;
// 获取会话采样率
if (_sessions.TryGetValue(sessionId, out var session))
{
sampleRate = session.SampleRate;
}
// 检查关键参数是否为空
if (string.IsNullOrEmpty(config.AppId) ||
string.IsNullOrEmpty(config.SecretId) ||
string.IsNullOrEmpty(config.SecretKey))
{
_logger.LogError("腾讯云API参数缺失AppId, SecretId 或 SecretKey 为空");
_logger.LogError($"当前配置 - AppId: '{config.AppId}', SecretId: '{(string.IsNullOrEmpty(config.SecretId) ? "" : "")}', SecretKey: '{(string.IsNullOrEmpty(config.SecretKey) ? "" : "")}'");
throw new InvalidOperationException("腾讯云API参数不能为空");
}
// 生成随机数用作nonce
var nonce = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString();
// 当前时间戳
var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString();
// 过期时间默认24小时
var expired = (DateTimeOffset.UtcNow.ToUnixTimeSeconds() + 86400).ToString();
// 语音ID使用会话ID
var voiceId = sessionId;
// 根据采样率选择引擎模型
var engineModelType = sampleRate == 8000 ? "8k_zh" : "16k_zh";
if (!string.IsNullOrEmpty(config.EngineModelType))
{
engineModelType = config.EngineModelType;
}
// 构建参数字典
var parameters = new Dictionary<string, string>
{
{ "secretid", config.SecretId },
{ "timestamp", timestamp },
{ "expired", expired },
{ "nonce", nonce },
{ "voice_id", voiceId },
{ "voice_format", config.VoiceFormat }, // 1: pcm格式
{ "engine_model_type", engineModelType },
{ "needvad", config.NeedVad ? "1" : "0" }
};
// 可选参数
if (config.FilterDirty)
parameters.Add("filter_dirty", "1");
if (config.FilterModal)
parameters.Add("filter_modal", "1");
if (config.FilterPunc)
parameters.Add("filter_punc", "1");
// 构建签名原始字符串
var signatureOrigin = "asr.cloud.tencent.com/asr/v2/" + config.AppId + "?";
var paramList = new List<string>();
foreach (var param in parameters.OrderBy(p => p.Key))
{
paramList.Add($"{param.Key}={param.Value}");
}
signatureOrigin += string.Join("&", paramList);
// 使用HMAC-SHA1计算签名
using var hmac = new HMACSHA1(Encoding.UTF8.GetBytes(config.SecretKey));
var signatureBytes = hmac.ComputeHash(Encoding.UTF8.GetBytes(signatureOrigin));
var signature = Convert.ToBase64String(signatureBytes);
// URL编码签名
var encodedSignature = HttpUtility.UrlEncode(signature);
// 构建最终URL - 需要确保AppID包含在URL路径中
var url = $"wss://asr.cloud.tencent.com/asr/v2/{config.AppId}?";
url += string.Join("&", parameters.OrderBy(p => p.Key).Select(p => $"{p.Key}={HttpUtility.UrlEncode(p.Value)}")) + $"&signature={encodedSignature}";
_logger.LogInformation($"生成的签名原始字符串: {signatureOrigin}");
_logger.LogInformation($"生成的签名: {signature}");
_logger.LogInformation($"正在连接腾讯云WebSocketURL: {url}");
return url;
}
catch (Exception ex)
{
_logger.LogError($"构建腾讯云URL失败: {ex.Message}");
throw;
}
}
/// <summary>
/// 触发结果事件
/// </summary>
/// <param name="result">语音识别结果</param>
protected virtual void OnResultReceived(SpeechToTextResult result)
{
ResultReceived?.Invoke(this, result);
}
/// <summary>
/// 释放资源
/// </summary>
public void Dispose()
{
if (_isDisposed)
return;
_isDisposed = true;
// 结束所有会话并保存文本
foreach (var sessionId in _webSockets.Keys.ToList())
{
try
{
// 保存会话文本
if (_sessions.TryGetValue(sessionId, out var session) && session.RecognizedTexts.Count > 0)
{
string finalText = string.Join("\n", session.RecognizedTexts);
SaveRecognitionText(sessionId, finalText);
}
// 结束会话
EndSessionAsync(sessionId).Wait();
}
catch (Exception ex)
{
_logger.LogWarning($"释放会话资源失败: {ex.Message}");
}
}
// 清空所有集合
_webSockets.Clear();
_cancellationTokens.Clear();
_sessions.Clear();
}
/// <summary>
/// 保存最新的识别文本
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="textList">文本列表</param>
private void SaveLatestRecognitionText(string sessionId, List<string> textList)
{
try
{
if (textList == null || textList.Count == 0)
return;
// 去重处理
var uniqueTexts = RemoveDuplicateTexts(textList);
string finalText = string.Join("\n", uniqueTexts);
// 保存文本
SaveRecognitionText(sessionId, finalText);
_logger.LogInformation($"已保存会话 {sessionId} 的最新识别文本,共 {uniqueTexts.Count} 句");
}
catch (Exception ex)
{
_logger.LogError($"保存最新识别文本失败: {ex.Message}");
}
}
/// <summary>
/// 根据会话ID获取识别文字
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <returns>识别的文字,如果会话不存在则返回空字符串</returns>
public string GetRecognizedText(string sessionId)
{
if (string.IsNullOrEmpty(sessionId))
{
_logger.LogWarning("尝试获取识别文字时会话ID为空");
return string.Empty;
}
// 检查会话是否存在
if (_sessions.TryGetValue(sessionId, out var session))
{
// 检查会话中的识别文本
if (session.RecognizedTexts.Count > 0)
{
// 返回去重后的文本
var uniqueTexts = RemoveDuplicateTexts(session.RecognizedTexts);
return string.Join("\n", uniqueTexts);
}
// 检查是否有待保存的文本
if (session.HasPendingText && !string.IsNullOrEmpty(session.PendingText))
{
return session.PendingText;
}
// 如果会话已结束,尝试从文件中读取
if (!string.IsNullOrEmpty(session.RecordingPath))
{
string textFilePath = Path.ChangeExtension(session.RecordingPath, ".txt");
if (File.Exists(textFilePath))
{
try
{
return File.ReadAllText(textFilePath);
}
catch (Exception ex)
{
_logger.LogError($"从文件读取识别文本失败: {ex.Message}");
}
}
}
}
_logger.LogWarning($"未找到会话 {sessionId} 的识别文本");
return string.Empty;
}
/// <summary>
/// 获取会话状态信息
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <returns>会话状态信息</returns>
public SpeechRecognitionSession GetSessionStatus(string sessionId)
{
if (string.IsNullOrEmpty(sessionId))
{
_logger.LogWarning("尝试获取会话状态时会话ID为空");
return new SpeechRecognitionSession
{
SessionId = string.Empty,
Exists = false,
IsActive = false,
HasSavedText = false,
HasPendingText = false,
RecordingPath = string.Empty,
IsSentToDisplay = false
};
}
// 如果已存在状态信息,直接返回
if (_sessions.TryGetValue(sessionId, out var session))
{
// 确保状态信息是最新的
session.IsActive = _webSockets.ContainsKey(sessionId) &&
_webSockets[sessionId]?.State == WebSocketState.Open;
// 如果有录音文件路径,检查文本文件是否存在
if (!string.IsNullOrEmpty(session.RecordingPath))
{
string textFilePath = Path.ChangeExtension(session.RecordingPath, ".txt");
session.HasSavedText = File.Exists(textFilePath);
}
// 更新会话
_sessions[sessionId] = session;
return session;
}
// 创建新的状态信息
bool isActive = _webSockets.ContainsKey(sessionId) &&
_webSockets[sessionId]?.State == WebSocketState.Open;
session = new SpeechRecognitionSession
{
SessionId = sessionId,
Exists = isActive,
IsActive = isActive,
HasSavedText = false,
HasPendingText = false,
RecordingPath = string.Empty,
IsSentToDisplay = false,
CreatedTime = DateTime.Now,
LastActivityTime = DateTime.Now
};
// 保存状态信息
_sessions[sessionId] = session;
return session;
}
/// <summary>
/// 设置会话的大屏发送状态
/// </summary>
/// <param name="sessionId">会话ID</param>
/// <param name="isSent">是否已发送到大屏</param>
public void SetSessionDisplayStatus(string sessionId, bool isSent)
{
if (string.IsNullOrEmpty(sessionId))
return;
// 获取或创建会话状态
var session = GetSessionStatus(sessionId);
session.IsSentToDisplay = isSent;
_sessions[sessionId] = session;
_logger.LogInformation($"设置会话 {sessionId} 的大屏发送状态: {(isSent ? "" : "")}");
}
}
}