live-forum/server/crawler/commerce/tools/capture_params.py
2026-03-24 11:27:37 +08:00

200 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
参数捕获工具
使用Playwright监听网络请求捕获API的验签参数
"""
import json
import re
from urllib.parse import urlparse, parse_qs
from playwright.sync_api import sync_playwright
def extract_params_from_url(url: str) -> dict:
"""从URL中提取所有参数"""
parsed = urlparse(url)
params = parse_qs(parsed.query)
# 将列表值转换为单个值
return {k: v[0] if len(v) == 1 else v for k, v in params.items()}
def analyze_request(url: str, headers: dict, post_data: str = None):
"""分析请求参数"""
print("\n" + "="*80)
print("🔍 捕获到API请求")
print("="*80)
# 解析URL
parsed_url = urlparse(url)
print(f"\n📍 URL: {parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}")
# 提取参数
params = extract_params_from_url(url)
# 关键验签参数
sign_params = {
'verifyFp': params.get('verifyFp', 'N/A'),
'fp': params.get('fp', 'N/A'),
'msToken': params.get('msToken', 'N/A'),
'a_bogus': params.get('a_bogus', 'N/A'),
}
print("\n🔑 关键验签参数:")
for key, value in sign_params.items():
if value != 'N/A':
# 截断显示长参数
display_value = value if len(value) <= 60 else value[:60] + "..."
print(f" {key:12} = {display_value}")
# 业务参数
business_params = {k: v for k, v in params.items()
if k not in sign_params}
if business_params:
print("\n📋 业务参数:")
for key, value in business_params.items():
print(f" {key:20} = {value}")
# 关键请求头
important_headers = ['cookie', 'user-agent', 'referer', 'origin']
print("\n📨 关键请求头:")
for key in important_headers:
value = headers.get(key, headers.get(key.title(), 'N/A'))
if value != 'N/A':
display_value = value if len(str(value)) <= 60 else str(value)[:60] + "..."
print(f" {key:15} = {display_value}")
# POST数据
if post_data:
print("\n📦 POST数据:")
try:
data = json.loads(post_data)
print(json.dumps(data, indent=2, ensure_ascii=False))
except:
print(post_data[:200] + "..." if len(post_data) > 200 else post_data)
print("\n" + "="*80)
def capture_api_requests(target_url: str, api_pattern: str = "getInstituteDarenData"):
"""
捕获API请求参数
Args:
target_url: 要访问的页面URL
api_pattern: 要捕获的API接口名称模式
"""
print(f"\n🚀 启动参数捕获工具")
print(f"📱 目标页面: {target_url}")
print(f"🎯 监听接口: {api_pattern}")
print("\n⏳ 正在启动浏览器...")
with sync_playwright() as p:
# 启动浏览器
browser = p.chromium.launch(
headless=False,
args=[
'--start-maximized',
'--disable-blink-features=AutomationControlled'
]
)
context = browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
)
page = context.new_page()
# 监听所有请求
captured_count = 0
def handle_request(request):
nonlocal captured_count
url = request.url
# 检查是否是目标API
if api_pattern in url:
captured_count += 1
headers = request.headers
post_data = request.post_data
analyze_request(url, headers, post_data)
print(f"\n✅ 已捕获 {captured_count} 个请求")
print("💡 提示: 继续操作页面可以捕获更多请求...")
def handle_response(response):
"""捕获响应数据"""
url = response.url
if api_pattern in url:
print(f"\n📥 响应状态: {response.status}")
try:
data = response.json()
print(f"📊 响应数据预览:")
# 只显示结构,不显示完整数据
if isinstance(data, dict):
print(f" - 响应字段: {list(data.keys())}")
if 'data' in data:
if isinstance(data['data'], list):
print(f" - 数据数量: {len(data['data'])}")
elif isinstance(data['data'], dict):
print(f" - 数据字段: {list(data['data'].keys())}")
except Exception as e:
print(f" ⚠️ 无法解析响应: {e}")
page.on('request', handle_request)
page.on('response', handle_response)
# 访问页面
print(f"\n🌐 正在访问页面...")
page.goto(target_url, wait_until='networkidle', timeout=60000)
print(f"\n✨ 页面已加载完成")
print(f"💡 请在浏览器中进行操作如登录、点击等工具会自动捕获API请求")
print(f"⌨️ 按 Ctrl+C 停止捕获\n")
# 保持浏览器打开,直到用户关闭
try:
page.wait_for_timeout(3600000) # 等待1小时
except KeyboardInterrupt:
print("\n\n👋 停止捕获")
finally:
browser.close()
if captured_count > 0:
print(f"\n📊 捕获统计:")
print(f" - 总共捕获: {captured_count} 个请求")
print(f" - 接口名称: {api_pattern}")
else:
print(f"\n⚠️ 未捕获到任何请求")
print(f"💡 提示:")
print(f" 1. 确保在浏览器中触发了相关操作")
print(f" 2. 检查接口名称是否正确")
def main():
"""主函数"""
import sys
# 默认参数
default_url = "https://buyin.jinritemai.com/mpa/account/institution-role-select"
default_pattern = "getInstituteDarenData"
# 从命令行参数获取
if len(sys.argv) > 1:
target_url = sys.argv[1]
else:
target_url = default_url
if len(sys.argv) > 2:
api_pattern = sys.argv[2]
else:
api_pattern = default_pattern
capture_api_requests(target_url, api_pattern)
if __name__ == "__main__":
main()