/** * 每日爬取主脚本 * 执行截图 + 数据获取 + 数据提取 */ const { chromium } = require('playwright'); const fs = require('fs'); const fsp = require('fs/promises'); const path = require('path'); const BASE_DIR = path.resolve(process.env.BASE_DIR || process.cwd()); const PIC_DIR = path.join(BASE_DIR, 'pic'); const DATA_DIR = path.join(BASE_DIR, 'data'); const TARGET_URL = 'http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749'; function formatDate(date) { const year = date.getFullYear(); const month = String(date.getMonth() + 1).padStart(2, '0'); const day = String(date.getDate()).padStart(2, '0'); return `${year}-${month}-${day}`; } // 口径调整:当天执行时,落地前一天的数据 function getDataDate() { const date = new Date(); date.setDate(date.getDate() - 1); return formatDate(date); } // 主函数 async function main() { const dataDate = getDataDate(); console.log('=========================================='); console.log(`开始爬取(落地日期): ${dataDate}`); console.log(`目标URL: ${TARGET_URL}`); console.log('==========================================\n'); const picPath = path.join(PIC_DIR, `${dataDate}.png`); const dataPath = path.join(DATA_DIR, `${dataDate}.json`); // 确保输出目录存在 if (!fs.existsSync(PIC_DIR)) { fs.mkdirSync(PIC_DIR, { recursive: true }); } if (!fs.existsSync(DATA_DIR)) { fs.mkdirSync(DATA_DIR, { recursive: true }); } const browser = await chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); try { const context = await browser.newContext({ viewport: { width: 1920, height: 1080 } }); const page = await context.newPage(); // 设置超时 page.setDefaultTimeout(60000); page.setDefaultNavigationTimeout(60000); // 访问页面 await page.goto(TARGET_URL, { waitUntil: 'networkidle', timeout: 60000 }); // 等待页面加载完成 await page.waitForLoadState('domcontentloaded'); // 额外等待3秒确保动态内容加载 await page.waitForTimeout(3000); // 执行JS:将所有 table 标签的 display 改为 table await page.evaluate(() => { const tables = document.querySelectorAll('table'); tables.forEach(table => { table.style.display = 'table'; }); }); // 截图 await page.screenshot({ path: picPath, fullPage: true }); console.log(` ✓ 截图已保存: ${picPath}`); for (const p of ['define.js', 'extract.js']) { const injectJsPath = path.join(BASE_DIR, 'public', p); const injectJsContent = await fsp.readFile(injectJsPath, 'utf-8'); await page.addScriptTag({ content: injectJsContent }); } console.log(' ✓ 数据提取脚本已注入'); // 获取页面文本内容 const data = await page.evaluate(() => { return JSON.stringify(extractData()); }); console.log(' ✓ 数据已提取'); const content = JSON.stringify({ date: dataDate, timestamp: Date.now(), source: TARGET_URL, data: JSON.parse(data) }, null, 2); // 保存原始内容 await Bun.write(dataPath, content); console.log(`\n ✓ 数据已保存: ${dataPath}`); } catch (error) { console.error(` ✗ 操作失败: ${error.message}`); process.exit(1); } finally { await browser.close(); } console.log('\n=========================================='); console.log(`爬取完成(落地日期): ${dataDate}`); console.log(`截图文件: ${picPath}`); console.log(`数据文件: ${dataPath}`); console.log('=========================================='); } main().catch(err => { console.error('执行失败:', err); process.exit(1); });