HtmlToPdf/mvp/HtmlToPdfService.Core/Services/PuppeteerPdfService.cs
2025-12-11 23:35:52 +08:00

428 lines
14 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System.Diagnostics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using PuppeteerSharp;
using PuppeteerSharp.Media;
using HtmlToPdfService.Core.Models;
using HtmlToPdfService.Core.Options;
using HtmlToPdfService.Core.Pool;
using HtmlToPdfService.Core.Storage;
namespace HtmlToPdfService.Core.Services;
/// <summary>
/// Puppeteer PDF 转换服务实现
/// </summary>
public class PuppeteerPdfService : IPdfService
{
private readonly PdfServiceOptions _options;
private readonly ILogger<PuppeteerPdfService> _logger;
private readonly IBrowserPool _browserPool;
private readonly IFileStorage _fileStorage;
private readonly ICallbackService _callbackService;
public PuppeteerPdfService(
IOptions<PdfServiceOptions> options,
ILogger<PuppeteerPdfService> logger,
IBrowserPool browserPool,
IFileStorage fileStorage,
ICallbackService callbackService)
{
_options = options.Value;
_logger = logger;
_browserPool = browserPool;
_fileStorage = fileStorage;
_callbackService = callbackService;
}
/// <summary>
/// 将 HTML 内容转换为 PDF
/// </summary>
public async Task<ConversionResult> ConvertHtmlToPdfAsync(
string html,
PdfOptions? options = null,
string? callbackUrl = null,
Dictionary<string, string>? callbackHeaders = null,
bool? includePdfInCallback = null,
bool? saveLocal = null,
CancellationToken cancellationToken = default)
{
var requestId = Guid.NewGuid().ToString();
var startTime = DateTime.UtcNow;
var stopwatch = Stopwatch.StartNew();
_logger.LogInformation("开始转换 HTML to PDF, RequestId: {RequestId}", requestId);
IBrowser? browser = null;
IPage? page = null;
try
{
// 验证 HTML 内容大小
var htmlSize = System.Text.Encoding.UTF8.GetByteCount(html);
if (htmlSize > _options.Conversion.MaxHtmlSize)
{
throw new ArgumentException(
$"HTML 内容过大: {htmlSize} bytes最大允许: {_options.Conversion.MaxHtmlSize} bytes");
}
// 从浏览器池获取实例
browser = await _browserPool.AcquireAsync(cancellationToken);
_logger.LogDebug("获取浏览器实例成功, RequestId: {RequestId}", requestId);
// 创建新页面
page = await browser.NewPageAsync();
// 设置 HTML 内容
await page.SetContentAsync(html, new NavigationOptions
{
Timeout = _options.Conversion.DefaultTimeout,
WaitUntil = ParseWaitUntil(_options.Conversion.DefaultWaitUntil)
});
// 生成 PDF
var pdfOptions = BuildPdfOptions(options);
var pdfData = await page.PdfDataAsync(pdfOptions);
stopwatch.Stop();
_logger.LogInformation("HTML 转换成功, RequestId: {RequestId}, 耗时: {Duration}ms, 大小: {Size} bytes",
requestId, stopwatch.ElapsedMilliseconds, pdfData.Length);
// 构建结果
var result = new ConversionResult
{
RequestId = requestId,
Success = true,
PdfData = pdfData,
FileSize = pdfData.Length,
Duration = stopwatch.ElapsedMilliseconds,
StartTime = startTime,
CompleteTime = DateTime.UtcNow
};
// 保存本地副本
var shouldSaveLocal = saveLocal ?? _options.Storage.SaveLocalCopy;
if (shouldSaveLocal)
{
var (filePath, downloadUrl) = await _fileStorage.SaveAsync(requestId, pdfData, cancellationToken);
result.LocalFilePath = filePath;
result.DownloadUrl = downloadUrl;
}
// 发送回调
await SendCallbackIfNeededAsync(
result,
"html",
html,
options,
callbackUrl,
callbackHeaders,
includePdfInCallback);
return result;
}
catch (Exception ex)
{
stopwatch.Stop();
_logger.LogError(ex, "HTML 转换失败, RequestId: {RequestId}, 耗时: {Duration}ms",
requestId, stopwatch.ElapsedMilliseconds);
var result = new ConversionResult
{
RequestId = requestId,
Success = false,
Duration = stopwatch.ElapsedMilliseconds,
StartTime = startTime,
CompleteTime = DateTime.UtcNow,
ErrorMessage = ex.Message,
ExceptionDetails = ex.ToString()
};
// 发送失败回调
await SendCallbackIfNeededAsync(
result,
"html",
html,
options,
callbackUrl,
callbackHeaders,
includePdfInCallback);
throw;
}
finally
{
// 关闭页面
if (page != null)
{
await page.CloseAsync();
}
// 归还浏览器到池中
if (browser != null)
{
_browserPool.Release(browser);
}
}
}
/// <summary>
/// 将 URL 转换为 PDF
/// </summary>
public async Task<ConversionResult> ConvertUrlToPdfAsync(
string url,
PdfOptions? options = null,
WaitUntilNavigation[]? waitUntil = null,
int? timeout = null,
string? callbackUrl = null,
Dictionary<string, string>? callbackHeaders = null,
bool? includePdfInCallback = null,
bool? saveLocal = null,
CancellationToken cancellationToken = default)
{
var requestId = Guid.NewGuid().ToString();
var startTime = DateTime.UtcNow;
var stopwatch = Stopwatch.StartNew();
_logger.LogInformation("开始转换 URL to PDF, RequestId: {RequestId}, URL: {Url}", requestId, url);
IBrowser? browser = null;
IPage? page = null;
try
{
// 验证 URL
if (!Uri.TryCreate(url, UriKind.Absolute, out var uri))
{
throw new ArgumentException($"无效的 URL: {url}");
}
// 从浏览器池获取实例
browser = await _browserPool.AcquireAsync(cancellationToken);
_logger.LogDebug("获取浏览器实例成功, RequestId: {RequestId}", requestId);
// 创建新页面
page = await browser.NewPageAsync();
// 导航到 URL
var navigationOptions = new NavigationOptions
{
Timeout = timeout ?? _options.Conversion.DefaultTimeout,
WaitUntil = waitUntil ?? ParseWaitUntil(_options.Conversion.DefaultWaitUntil)
};
await page.GoToAsync(url, navigationOptions);
// 生成 PDF
var pdfOptions = BuildPdfOptions(options);
var pdfData = await page.PdfDataAsync(pdfOptions);
stopwatch.Stop();
_logger.LogInformation("URL 转换成功, RequestId: {RequestId}, 耗时: {Duration}ms, 大小: {Size} bytes",
requestId, stopwatch.ElapsedMilliseconds, pdfData.Length);
// 构建结果
var result = new ConversionResult
{
RequestId = requestId,
Success = true,
PdfData = pdfData,
FileSize = pdfData.Length,
Duration = stopwatch.ElapsedMilliseconds,
StartTime = startTime,
CompleteTime = DateTime.UtcNow
};
// 保存本地副本
var shouldSaveLocal = saveLocal ?? _options.Storage.SaveLocalCopy;
if (shouldSaveLocal)
{
var (filePath, downloadUrl) = await _fileStorage.SaveAsync(requestId, pdfData, cancellationToken);
result.LocalFilePath = filePath;
result.DownloadUrl = downloadUrl;
}
// 发送回调
await SendCallbackIfNeededAsync(
result,
"url",
url,
options,
callbackUrl,
callbackHeaders,
includePdfInCallback);
return result;
}
catch (Exception ex)
{
stopwatch.Stop();
_logger.LogError(ex, "URL 转换失败, RequestId: {RequestId}, URL: {Url}, 耗时: {Duration}ms",
requestId, url, stopwatch.ElapsedMilliseconds);
var result = new ConversionResult
{
RequestId = requestId,
Success = false,
Duration = stopwatch.ElapsedMilliseconds,
StartTime = startTime,
CompleteTime = DateTime.UtcNow,
ErrorMessage = ex.Message,
ExceptionDetails = ex.ToString()
};
// 发送失败回调
await SendCallbackIfNeededAsync(
result,
"url",
url,
options,
callbackUrl,
callbackHeaders,
includePdfInCallback);
throw;
}
finally
{
// 关闭页面
if (page != null)
{
await page.CloseAsync();
}
// 归还浏览器到池中
if (browser != null)
{
_browserPool.Release(browser);
}
}
}
/// <summary>
/// 构建 PDF 选项
/// </summary>
private PdfOptions BuildPdfOptions(PdfOptions? customOptions)
{
var defaultOptions = _options.DefaultPdfOptions;
return new PdfOptions
{
Format = customOptions?.Format ?? ParsePaperFormat(defaultOptions.Format),
Landscape = customOptions?.Landscape ?? defaultOptions.Landscape,
PrintBackground = customOptions?.PrintBackground ?? defaultOptions.PrintBackground,
PreferCSSPageSize = customOptions?.PreferCSSPageSize ?? defaultOptions.PreferCSSPageSize,
MarginOptions = customOptions?.MarginOptions ?? new PuppeteerSharp.Media.MarginOptions
{
Top = defaultOptions.Margin.Top,
Right = defaultOptions.Margin.Right,
Bottom = defaultOptions.Margin.Bottom,
Left = defaultOptions.Margin.Left
}
};
}
/// <summary>
/// 解析纸张格式
/// </summary>
private PaperFormat ParsePaperFormat(string format)
{
return format.ToUpperInvariant() switch
{
"A3" => PaperFormat.A3,
"A4" => PaperFormat.A4,
"A5" => PaperFormat.A5,
"LETTER" => PaperFormat.Letter,
"LEGAL" => PaperFormat.Legal,
"TABLOID" => PaperFormat.Tabloid,
_ => PaperFormat.A4
};
}
/// <summary>
/// 解析等待条件
/// </summary>
private WaitUntilNavigation[] ParseWaitUntil(string waitUntil)
{
return waitUntil.ToLowerInvariant() switch
{
"load" => new[] { WaitUntilNavigation.Load },
"domcontentloaded" => new[] { WaitUntilNavigation.DOMContentLoaded },
"networkidle0" => new[] { WaitUntilNavigation.Networkidle0 },
"networkidle2" => new[] { WaitUntilNavigation.Networkidle2 },
_ => new[] { WaitUntilNavigation.Networkidle2 }
};
}
/// <summary>
/// 发送回调(如果需要)
/// </summary>
private async Task SendCallbackIfNeededAsync(
ConversionResult result,
string sourceType,
string sourceContent,
object? options,
string? callbackUrl,
Dictionary<string, string>? callbackHeaders,
bool? includePdfInCallback)
{
// 确定回调 URL请求级优先于全局配置
var effectiveCallbackUrl = callbackUrl ?? _options.Callback.DefaultUrl;
if (string.IsNullOrEmpty(effectiveCallbackUrl))
{
return;
}
// 确定是否包含 PDF 数据
var shouldIncludePdf = includePdfInCallback ?? _options.Callback.IncludePdfData;
// 构建回调负载
var payload = new CallbackPayload
{
RequestId = result.RequestId,
Status = result.Success ? "success" : "failed",
Timestamp = DateTime.UtcNow,
Duration = result.Duration,
Source = new CallbackSource
{
Type = sourceType,
Content = sourceContent,
Options = options
}
};
if (result.Success)
{
payload.Result = new CallbackResult
{
FileSize = result.FileSize,
DownloadUrl = result.DownloadUrl,
PdfBase64 = shouldIncludePdf && result.PdfData != null
? Convert.ToBase64String(result.PdfData)
: null,
ExpiresAt = result.DownloadUrl != null
? DateTime.UtcNow.AddHours(_options.Storage.RetentionHours)
: null
};
}
else
{
payload.Error = new CallbackError
{
Code = "CONVERSION_FAILED",
Message = result.ErrorMessage ?? "未知错误",
Details = result.ExceptionDetails
};
}
// 发送回调
await _callbackService.SendCallbackAsync(
effectiveCallbackUrl,
payload,
callbackHeaders);
}
}