428 lines
14 KiB
C#
428 lines
14 KiB
C#
using System.Diagnostics;
|
||
using Microsoft.Extensions.Logging;
|
||
using Microsoft.Extensions.Options;
|
||
using PuppeteerSharp;
|
||
using PuppeteerSharp.Media;
|
||
using HtmlToPdfService.Core.Models;
|
||
using HtmlToPdfService.Core.Options;
|
||
using HtmlToPdfService.Core.Pool;
|
||
using HtmlToPdfService.Core.Storage;
|
||
|
||
namespace HtmlToPdfService.Core.Services;
|
||
|
||
/// <summary>
|
||
/// Puppeteer PDF 转换服务实现
|
||
/// </summary>
|
||
public class PuppeteerPdfService : IPdfService
|
||
{
|
||
private readonly PdfServiceOptions _options;
|
||
private readonly ILogger<PuppeteerPdfService> _logger;
|
||
private readonly IBrowserPool _browserPool;
|
||
private readonly IFileStorage _fileStorage;
|
||
private readonly ICallbackService _callbackService;
|
||
|
||
public PuppeteerPdfService(
|
||
IOptions<PdfServiceOptions> options,
|
||
ILogger<PuppeteerPdfService> logger,
|
||
IBrowserPool browserPool,
|
||
IFileStorage fileStorage,
|
||
ICallbackService callbackService)
|
||
{
|
||
_options = options.Value;
|
||
_logger = logger;
|
||
_browserPool = browserPool;
|
||
_fileStorage = fileStorage;
|
||
_callbackService = callbackService;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 将 HTML 内容转换为 PDF
|
||
/// </summary>
|
||
public async Task<ConversionResult> ConvertHtmlToPdfAsync(
|
||
string html,
|
||
PdfOptions? options = null,
|
||
string? callbackUrl = null,
|
||
Dictionary<string, string>? callbackHeaders = null,
|
||
bool? includePdfInCallback = null,
|
||
bool? saveLocal = null,
|
||
CancellationToken cancellationToken = default)
|
||
{
|
||
var requestId = Guid.NewGuid().ToString();
|
||
var startTime = DateTime.UtcNow;
|
||
var stopwatch = Stopwatch.StartNew();
|
||
|
||
_logger.LogInformation("开始转换 HTML to PDF, RequestId: {RequestId}", requestId);
|
||
|
||
IBrowser? browser = null;
|
||
IPage? page = null;
|
||
|
||
try
|
||
{
|
||
// 验证 HTML 内容大小
|
||
var htmlSize = System.Text.Encoding.UTF8.GetByteCount(html);
|
||
if (htmlSize > _options.Conversion.MaxHtmlSize)
|
||
{
|
||
throw new ArgumentException(
|
||
$"HTML 内容过大: {htmlSize} bytes,最大允许: {_options.Conversion.MaxHtmlSize} bytes");
|
||
}
|
||
|
||
// 从浏览器池获取实例
|
||
browser = await _browserPool.AcquireAsync(cancellationToken);
|
||
_logger.LogDebug("获取浏览器实例成功, RequestId: {RequestId}", requestId);
|
||
|
||
// 创建新页面
|
||
page = await browser.NewPageAsync();
|
||
|
||
// 设置 HTML 内容
|
||
await page.SetContentAsync(html, new NavigationOptions
|
||
{
|
||
Timeout = _options.Conversion.DefaultTimeout,
|
||
WaitUntil = ParseWaitUntil(_options.Conversion.DefaultWaitUntil)
|
||
});
|
||
|
||
// 生成 PDF
|
||
var pdfOptions = BuildPdfOptions(options);
|
||
var pdfData = await page.PdfDataAsync(pdfOptions);
|
||
|
||
stopwatch.Stop();
|
||
|
||
_logger.LogInformation("HTML 转换成功, RequestId: {RequestId}, 耗时: {Duration}ms, 大小: {Size} bytes",
|
||
requestId, stopwatch.ElapsedMilliseconds, pdfData.Length);
|
||
|
||
// 构建结果
|
||
var result = new ConversionResult
|
||
{
|
||
RequestId = requestId,
|
||
Success = true,
|
||
PdfData = pdfData,
|
||
FileSize = pdfData.Length,
|
||
Duration = stopwatch.ElapsedMilliseconds,
|
||
StartTime = startTime,
|
||
CompleteTime = DateTime.UtcNow
|
||
};
|
||
|
||
// 保存本地副本
|
||
var shouldSaveLocal = saveLocal ?? _options.Storage.SaveLocalCopy;
|
||
if (shouldSaveLocal)
|
||
{
|
||
var (filePath, downloadUrl) = await _fileStorage.SaveAsync(requestId, pdfData, cancellationToken);
|
||
result.LocalFilePath = filePath;
|
||
result.DownloadUrl = downloadUrl;
|
||
}
|
||
|
||
// 发送回调
|
||
await SendCallbackIfNeededAsync(
|
||
result,
|
||
"html",
|
||
html,
|
||
options,
|
||
callbackUrl,
|
||
callbackHeaders,
|
||
includePdfInCallback);
|
||
|
||
return result;
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
stopwatch.Stop();
|
||
_logger.LogError(ex, "HTML 转换失败, RequestId: {RequestId}, 耗时: {Duration}ms",
|
||
requestId, stopwatch.ElapsedMilliseconds);
|
||
|
||
var result = new ConversionResult
|
||
{
|
||
RequestId = requestId,
|
||
Success = false,
|
||
Duration = stopwatch.ElapsedMilliseconds,
|
||
StartTime = startTime,
|
||
CompleteTime = DateTime.UtcNow,
|
||
ErrorMessage = ex.Message,
|
||
ExceptionDetails = ex.ToString()
|
||
};
|
||
|
||
// 发送失败回调
|
||
await SendCallbackIfNeededAsync(
|
||
result,
|
||
"html",
|
||
html,
|
||
options,
|
||
callbackUrl,
|
||
callbackHeaders,
|
||
includePdfInCallback);
|
||
|
||
throw;
|
||
}
|
||
finally
|
||
{
|
||
// 关闭页面
|
||
if (page != null)
|
||
{
|
||
await page.CloseAsync();
|
||
}
|
||
|
||
// 归还浏览器到池中
|
||
if (browser != null)
|
||
{
|
||
_browserPool.Release(browser);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 将 URL 转换为 PDF
|
||
/// </summary>
|
||
public async Task<ConversionResult> ConvertUrlToPdfAsync(
|
||
string url,
|
||
PdfOptions? options = null,
|
||
WaitUntilNavigation[]? waitUntil = null,
|
||
int? timeout = null,
|
||
string? callbackUrl = null,
|
||
Dictionary<string, string>? callbackHeaders = null,
|
||
bool? includePdfInCallback = null,
|
||
bool? saveLocal = null,
|
||
CancellationToken cancellationToken = default)
|
||
{
|
||
var requestId = Guid.NewGuid().ToString();
|
||
var startTime = DateTime.UtcNow;
|
||
var stopwatch = Stopwatch.StartNew();
|
||
|
||
_logger.LogInformation("开始转换 URL to PDF, RequestId: {RequestId}, URL: {Url}", requestId, url);
|
||
|
||
IBrowser? browser = null;
|
||
IPage? page = null;
|
||
|
||
try
|
||
{
|
||
// 验证 URL
|
||
if (!Uri.TryCreate(url, UriKind.Absolute, out var uri))
|
||
{
|
||
throw new ArgumentException($"无效的 URL: {url}");
|
||
}
|
||
|
||
// 从浏览器池获取实例
|
||
browser = await _browserPool.AcquireAsync(cancellationToken);
|
||
_logger.LogDebug("获取浏览器实例成功, RequestId: {RequestId}", requestId);
|
||
|
||
// 创建新页面
|
||
page = await browser.NewPageAsync();
|
||
|
||
// 导航到 URL
|
||
var navigationOptions = new NavigationOptions
|
||
{
|
||
Timeout = timeout ?? _options.Conversion.DefaultTimeout,
|
||
WaitUntil = waitUntil ?? ParseWaitUntil(_options.Conversion.DefaultWaitUntil)
|
||
};
|
||
|
||
await page.GoToAsync(url, navigationOptions);
|
||
|
||
// 生成 PDF
|
||
var pdfOptions = BuildPdfOptions(options);
|
||
var pdfData = await page.PdfDataAsync(pdfOptions);
|
||
|
||
stopwatch.Stop();
|
||
|
||
_logger.LogInformation("URL 转换成功, RequestId: {RequestId}, 耗时: {Duration}ms, 大小: {Size} bytes",
|
||
requestId, stopwatch.ElapsedMilliseconds, pdfData.Length);
|
||
|
||
// 构建结果
|
||
var result = new ConversionResult
|
||
{
|
||
RequestId = requestId,
|
||
Success = true,
|
||
PdfData = pdfData,
|
||
FileSize = pdfData.Length,
|
||
Duration = stopwatch.ElapsedMilliseconds,
|
||
StartTime = startTime,
|
||
CompleteTime = DateTime.UtcNow
|
||
};
|
||
|
||
// 保存本地副本
|
||
var shouldSaveLocal = saveLocal ?? _options.Storage.SaveLocalCopy;
|
||
if (shouldSaveLocal)
|
||
{
|
||
var (filePath, downloadUrl) = await _fileStorage.SaveAsync(requestId, pdfData, cancellationToken);
|
||
result.LocalFilePath = filePath;
|
||
result.DownloadUrl = downloadUrl;
|
||
}
|
||
|
||
// 发送回调
|
||
await SendCallbackIfNeededAsync(
|
||
result,
|
||
"url",
|
||
url,
|
||
options,
|
||
callbackUrl,
|
||
callbackHeaders,
|
||
includePdfInCallback);
|
||
|
||
return result;
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
stopwatch.Stop();
|
||
_logger.LogError(ex, "URL 转换失败, RequestId: {RequestId}, URL: {Url}, 耗时: {Duration}ms",
|
||
requestId, url, stopwatch.ElapsedMilliseconds);
|
||
|
||
var result = new ConversionResult
|
||
{
|
||
RequestId = requestId,
|
||
Success = false,
|
||
Duration = stopwatch.ElapsedMilliseconds,
|
||
StartTime = startTime,
|
||
CompleteTime = DateTime.UtcNow,
|
||
ErrorMessage = ex.Message,
|
||
ExceptionDetails = ex.ToString()
|
||
};
|
||
|
||
// 发送失败回调
|
||
await SendCallbackIfNeededAsync(
|
||
result,
|
||
"url",
|
||
url,
|
||
options,
|
||
callbackUrl,
|
||
callbackHeaders,
|
||
includePdfInCallback);
|
||
|
||
throw;
|
||
}
|
||
finally
|
||
{
|
||
// 关闭页面
|
||
if (page != null)
|
||
{
|
||
await page.CloseAsync();
|
||
}
|
||
|
||
// 归还浏览器到池中
|
||
if (browser != null)
|
||
{
|
||
_browserPool.Release(browser);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 构建 PDF 选项
|
||
/// </summary>
|
||
private PdfOptions BuildPdfOptions(PdfOptions? customOptions)
|
||
{
|
||
var defaultOptions = _options.DefaultPdfOptions;
|
||
|
||
return new PdfOptions
|
||
{
|
||
Format = customOptions?.Format ?? ParsePaperFormat(defaultOptions.Format),
|
||
Landscape = customOptions?.Landscape ?? defaultOptions.Landscape,
|
||
PrintBackground = customOptions?.PrintBackground ?? defaultOptions.PrintBackground,
|
||
PreferCSSPageSize = customOptions?.PreferCSSPageSize ?? defaultOptions.PreferCSSPageSize,
|
||
MarginOptions = customOptions?.MarginOptions ?? new PuppeteerSharp.Media.MarginOptions
|
||
{
|
||
Top = defaultOptions.Margin.Top,
|
||
Right = defaultOptions.Margin.Right,
|
||
Bottom = defaultOptions.Margin.Bottom,
|
||
Left = defaultOptions.Margin.Left
|
||
}
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// 解析纸张格式
|
||
/// </summary>
|
||
private PaperFormat ParsePaperFormat(string format)
|
||
{
|
||
return format.ToUpperInvariant() switch
|
||
{
|
||
"A3" => PaperFormat.A3,
|
||
"A4" => PaperFormat.A4,
|
||
"A5" => PaperFormat.A5,
|
||
"LETTER" => PaperFormat.Letter,
|
||
"LEGAL" => PaperFormat.Legal,
|
||
"TABLOID" => PaperFormat.Tabloid,
|
||
_ => PaperFormat.A4
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// 解析等待条件
|
||
/// </summary>
|
||
private WaitUntilNavigation[] ParseWaitUntil(string waitUntil)
|
||
{
|
||
return waitUntil.ToLowerInvariant() switch
|
||
{
|
||
"load" => new[] { WaitUntilNavigation.Load },
|
||
"domcontentloaded" => new[] { WaitUntilNavigation.DOMContentLoaded },
|
||
"networkidle0" => new[] { WaitUntilNavigation.Networkidle0 },
|
||
"networkidle2" => new[] { WaitUntilNavigation.Networkidle2 },
|
||
_ => new[] { WaitUntilNavigation.Networkidle2 }
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// 发送回调(如果需要)
|
||
/// </summary>
|
||
private async Task SendCallbackIfNeededAsync(
|
||
ConversionResult result,
|
||
string sourceType,
|
||
string sourceContent,
|
||
object? options,
|
||
string? callbackUrl,
|
||
Dictionary<string, string>? callbackHeaders,
|
||
bool? includePdfInCallback)
|
||
{
|
||
// 确定回调 URL(请求级优先于全局配置)
|
||
var effectiveCallbackUrl = callbackUrl ?? _options.Callback.DefaultUrl;
|
||
if (string.IsNullOrEmpty(effectiveCallbackUrl))
|
||
{
|
||
return;
|
||
}
|
||
|
||
// 确定是否包含 PDF 数据
|
||
var shouldIncludePdf = includePdfInCallback ?? _options.Callback.IncludePdfData;
|
||
|
||
// 构建回调负载
|
||
var payload = new CallbackPayload
|
||
{
|
||
RequestId = result.RequestId,
|
||
Status = result.Success ? "success" : "failed",
|
||
Timestamp = DateTime.UtcNow,
|
||
Duration = result.Duration,
|
||
Source = new CallbackSource
|
||
{
|
||
Type = sourceType,
|
||
Content = sourceContent,
|
||
Options = options
|
||
}
|
||
};
|
||
|
||
if (result.Success)
|
||
{
|
||
payload.Result = new CallbackResult
|
||
{
|
||
FileSize = result.FileSize,
|
||
DownloadUrl = result.DownloadUrl,
|
||
PdfBase64 = shouldIncludePdf && result.PdfData != null
|
||
? Convert.ToBase64String(result.PdfData)
|
||
: null,
|
||
ExpiresAt = result.DownloadUrl != null
|
||
? DateTime.UtcNow.AddHours(_options.Storage.RetentionHours)
|
||
: null
|
||
};
|
||
}
|
||
else
|
||
{
|
||
payload.Error = new CallbackError
|
||
{
|
||
Code = "CONVERSION_FAILED",
|
||
Message = result.ErrorMessage ?? "未知错误",
|
||
Details = result.ExceptionDetails
|
||
};
|
||
}
|
||
|
||
// 发送回调
|
||
await _callbackService.SendCallbackAsync(
|
||
effectiveCallbackUrl,
|
||
payload,
|
||
callbackHeaders);
|
||
}
|
||
}
|
||
|