refactor: 改为bun运行时

This commit is contained in:
李岩岩 2026-04-07 15:43:13 +08:00
parent b3cf0efe1a
commit f45bdb6636
11 changed files with 216 additions and 274 deletions

4
.env
View File

@ -1,2 +1,2 @@
BASE_DIR=/app/houseDream BASE_DIR=/Users/liyanyan/study/house-data-collect
PORT=8080 PORT=8888

View File

@ -57,27 +57,38 @@
### 依赖安装 ### 依赖安装
```bash ```bash
cd /app/houseDream cd /app/houseDream
npm install bun install
``` ```
### 手动运行 ### 手动运行
```bash ```bash
# 启动 Web 服务器 # 启动 Web 服务器
npm run server bun run start
# 开发模式(文件变更自动重启)
bun run dev
# 执行完整爬取(截图+数据提取) # 执行完整爬取(截图+数据提取)
npm run daily bun run daily
# 仅截图 # 仅截图
npm run screenshot bun run screenshot
```
### Playwright 浏览器安装
首次安装依赖后请执行:
```bash
bunx playwright install chromium
``` ```
### PM2 管理(推荐) ### PM2 管理(推荐)
```bash ```bash
# 启动服务 # 启动服务
pm2 start server.js --name houseDream pm2 start "bun run start" --name houseDream
# 查看状态 # 查看状态
pm2 list pm2 list
@ -109,7 +120,7 @@ pm2 startup
## 技术栈 ## 技术栈
- **爬虫**: Playwright (Chromium) - **爬虫**: Playwright (Chromium)
- **后端**: Node.js + 原生 HTTP 模块 - **后端**: Bun + Bun.serve
- **前端**: HTML5 + CSS3 + Vanilla JavaScript - **前端**: HTML5 + CSS3 + Vanilla JavaScript
- **进程管理**: PM2 - **进程管理**: PM2
@ -117,13 +128,15 @@ pm2 startup
| 变量 | 默认值 | 说明 | | 变量 | 默认值 | 说明 |
|------|--------|------| |------|--------|------|
| `BASE_DIR` | 当前工作目录 | 项目根目录(用于定位 data/pic/web/public |
| `HOST` | 127.0.0.1 | 服务器监听地址 |
| `PORT` | 8080 | 服务器监听端口 | | `PORT` | 8080 | 服务器监听端口 |
## 注意事项 ## 注意事项
1. 数据文件永久保留,不会自动清理 1. 数据文件永久保留,不会自动清理
2. 页面使用 Playwright 获取完整渲染后的内容 2. 页面使用 Playwright 获取完整渲染后的内容
3. 服务器默认绑定到 localhost如需外网访问请修改 server.js 中的监听地址 3. 默认监听地址为 127.0.0.1,可通过环境变量 HOST 修改
## 许可证 ## 许可证

19
bun.lock Normal file
View File

@ -0,0 +1,19 @@
{
"lockfileVersion": 1,
"configVersion": 0,
"workspaces": {
"": {
"name": "housedream",
"dependencies": {
"playwright": "^1.40.0",
},
},
},
"packages": {
"fsevents": ["fsevents@2.3.2", "http://mirrors.tencentyun.com/npm/fsevents/-/fsevents-2.3.2.tgz", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="],
"playwright": ["playwright@1.58.2", "http://mirrors.tencentyun.com/npm/playwright/-/playwright-1.58.2.tgz", { "dependencies": { "playwright-core": "1.58.2" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": "cli.js" }, "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A=="],
"playwright-core": ["playwright-core@1.58.2", "http://mirrors.tencentyun.com/npm/playwright-core/-/playwright-core-1.58.2.tgz", { "bin": "cli.js" }, "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg=="],
}
}

41
cron_daily.sh Executable file
View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$PROJECT_DIR"
if ! command -v bun >/dev/null 2>&1; then
echo "[ERROR] bun 未安装或不在 PATH 中"
exit 1
fi
if ! command -v git >/dev/null 2>&1; then
echo "[ERROR] git 未安装或不在 PATH 中"
exit 1
fi
if [[ ! -d .git ]]; then
echo "[ERROR] 当前目录不是 Git 仓库: $PROJECT_DIR"
exit 1
fi
echo "[INFO] 项目目录: $PROJECT_DIR"
echo "[INFO] 开始执行 daily 采集..."
bun run daily
echo "[INFO] 添加产物到暂存区..."
git add data pic
if git diff --cached --quiet; then
echo "[INFO] 没有新增变更,跳过提交和推送"
exit 0
fi
COMMIT_MSG="chore: daily data update $(date +%F)"
echo "[INFO] 提交变更: $COMMIT_MSG"
git commit -m "$COMMIT_MSG"
echo "[INFO] 推送到远端..."
git push
echo "[INFO] 执行完成"

60
package-lock.json generated
View File

@ -1,60 +0,0 @@
{
"name": "housedream",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "housedream",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"playwright": "^1.58.2"
}
},
"node_modules/fsevents": {
"version": "2.3.2",
"resolved": "http://mirrors.tencentyun.com/npm/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/playwright": {
"version": "1.58.2",
"resolved": "http://mirrors.tencentyun.com/npm/playwright/-/playwright-1.58.2.tgz",
"integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==",
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.58.2"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.58.2",
"resolved": "http://mirrors.tencentyun.com/npm/playwright-core/-/playwright-core-1.58.2.tgz",
"integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==",
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
}
}
}

View File

@ -3,10 +3,16 @@
"version": "1.0.0", "version": "1.0.0",
"description": "北京市房地产数据监控系统 - 自动爬取、提取、可视化展示", "description": "北京市房地产数据监控系统 - 自动爬取、提取、可视化展示",
"main": "server.js", "main": "server.js",
"packageManager": "bun@1.2.13",
"engines": {
"bun": ">=1.1.0"
},
"scripts": { "scripts": {
"server": "node server.js", "dev": "bun --watch server.js",
"screenshot": "node scripts/screenshot.js", "start": "bun server.js",
"daily": "node scripts/daily.js" "server": "bun run start",
"screenshot": "bun scripts/screenshot.js",
"daily": "bun scripts/daily.js"
}, },
"keywords": [ "keywords": [
"房地产", "房地产",
@ -18,7 +24,6 @@
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"dotenv": "^17.3.1",
"playwright": "^1.40.0" "playwright": "^1.40.0"
} }
} }

52
pnpm-lock.yaml generated
View File

@ -1,52 +0,0 @@
lockfileVersion: '9.0'
settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
importers:
.:
dependencies:
dotenv:
specifier: ^17.3.1
version: 17.3.1
playwright:
specifier: ^1.40.0
version: 1.58.2
packages:
dotenv@17.3.1:
resolution: {integrity: sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==}
engines: {node: '>=12'}
fsevents@2.3.2:
resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
os: [darwin]
playwright-core@1.58.2:
resolution: {integrity: sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==}
engines: {node: '>=18'}
hasBin: true
playwright@1.58.2:
resolution: {integrity: sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==}
engines: {node: '>=18'}
hasBin: true
snapshots:
dotenv@17.3.1: {}
fsevents@2.3.2:
optional: true
playwright-core@1.58.2: {}
playwright@1.58.2:
dependencies:
playwright-core: 1.58.2
optionalDependencies:
fsevents: 2.3.2

View File

@ -5,12 +5,10 @@
const { chromium } = require('playwright'); const { chromium } = require('playwright');
const fs = require('fs'); const fs = require('fs');
const fsp = require('fs/promises');
const path = require('path'); const path = require('path');
const dotenv = require('dotenv');
dotenv.config({path: ['.env.local', '.env'], quiet: true}); const BASE_DIR = path.resolve(process.env.BASE_DIR || process.cwd());
const BASE_DIR = process.env.BASE_DIR;
const PIC_DIR = path.join(BASE_DIR, 'pic'); const PIC_DIR = path.join(BASE_DIR, 'pic');
const DATA_DIR = path.join(BASE_DIR, 'data'); const DATA_DIR = path.join(BASE_DIR, 'data');
@ -77,9 +75,9 @@ async function main() {
console.log(` ✓ 截图已保存: ${picPath}`); console.log(` ✓ 截图已保存: ${picPath}`);
for (let p of ['define.js', 'extract.js']) { for (const p of ['define.js', 'extract.js']) {
const injectJsPath = path.join(BASE_DIR, 'public', p); const injectJsPath = path.join(BASE_DIR, 'public', p);
const injectJsContent = fs.readFileSync(injectJsPath, 'utf-8'); const injectJsContent = await fsp.readFile(injectJsPath, 'utf-8');
await page.addScriptTag({ content: injectJsContent }); await page.addScriptTag({ content: injectJsContent });
} }
console.log(' ✓ 数据提取脚本已注入'); console.log(' ✓ 数据提取脚本已注入');
@ -98,7 +96,7 @@ async function main() {
}, null, 2); }, null, 2);
// 保存原始内容 // 保存原始内容
fs.writeFileSync(dataPath, content, 'utf-8'); await Bun.write(dataPath, content);
console.log(`\n ✓ 数据已保存: ${dataPath}`); console.log(`\n ✓ 数据已保存: ${dataPath}`);
} catch (error) { } catch (error) {

View File

@ -5,11 +5,8 @@
const { chromium } = require('playwright'); const { chromium } = require('playwright');
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const dotenv = require('dotenv');
dotenv.config({path: ['.env.local', '.env'], quiet: true}); const BASE_DIR = path.resolve(process.env.BASE_DIR || process.cwd());
const BASE_DIR = process.env.BASE_DIR;
const PIC_DIR = path.join(BASE_DIR, 'pic'); const PIC_DIR = path.join(BASE_DIR, 'pic');
const TARGET_URL = 'http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749'; const TARGET_URL = 'http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749';

255
server.js
View File

@ -1,173 +1,154 @@
/** /**
* Web服务器脚本 * Bun Web服务器脚本
* 提供静态文件服务和API接口 * 提供静态文件服务和API接口
*/ */
const http = require('http');
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const url = require('url');
const dotenv = require('dotenv');
dotenv.config({path: ['.env.local', '.env'], quiet: true}); const BASE_DIR = path.resolve(process.env.BASE_DIR || process.cwd());
console.log('环境变量:', process.env.BASE_DIR);
const BASE_DIR = process.env.BASE_DIR;
const WEB_DIR = path.join(BASE_DIR, 'web'); const WEB_DIR = path.join(BASE_DIR, 'web');
const DATA_DIR = path.join(BASE_DIR, 'data'); const DATA_DIR = path.join(BASE_DIR, 'data');
const PIC_DIR = path.join(BASE_DIR, 'pic'); const PIC_DIR = path.join(BASE_DIR, 'pic');
const PUBLIC_DIR = path.join(BASE_DIR, 'public');
const PORT = process.env.PORT || 8080; const PORT = Number(process.env.PORT || 8080);
const HOST = process.env.HOST || '127.0.0.1';
// MIME类型映射 const CORS_HEADERS = {
const mimeTypes = { 'Access-Control-Allow-Origin': '*',
'.html': 'text/html', 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
'.css': 'text/css', 'Access-Control-Allow-Headers': 'Content-Type'
'.js': 'application/javascript',
'.json': 'application/json',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.gif': 'image/gif',
'.ico': 'image/x-icon'
}; };
// 获取MIME类型 function json(data, status = 200) {
function getMimeType(filePath) { return Response.json(data, {
const ext = path.extname(filePath).toLowerCase(); status,
return mimeTypes[ext] || 'application/octet-stream'; headers: CORS_HEADERS
}
// 读取文件
function readFile(filePath) {
return new Promise((resolve, reject) => {
fs.readFile(filePath, (err, data) => {
if (err) reject(err);
else resolve(data);
});
}); });
} }
// 列出可用日期 function text(body, status = 200) {
return new Response(body, {
status,
headers: CORS_HEADERS
});
}
function resolveSafePath(baseDir, requestPath) {
const relativePath = requestPath.replace(/^\/+/, '');
const resolvedPath = path.resolve(baseDir, relativePath);
if (resolvedPath === baseDir || resolvedPath.startsWith(`${baseDir}${path.sep}`)) {
return resolvedPath;
}
return null;
}
function listAvailableDates() { function listAvailableDates() {
try { try {
const files = fs.readdirSync(DATA_DIR); const files = fs.readdirSync(DATA_DIR);
return files return files
.filter(f => f.endsWith('.json') && !f.includes('_raw') && !f.includes('test')) .filter((file) => file.endsWith('.json') && !file.includes('_raw') && !file.includes('test'))
.map(f => f.replace('.json', '')) .map((file) => file.replace('.json', ''))
.sort() .sort()
.reverse(); .reverse();
} catch (err) { } catch {
return []; return [];
} }
} }
// 创建服务器 async function serveFile(filePath) {
const server = http.createServer(async (req, res) => { const file = Bun.file(filePath);
const parsedUrl = url.parse(req.url, true);
let pathname = parsedUrl.pathname; if (!(await file.exists())) {
return text('Not Found', 404);
// 设置CORS头
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
if (req.method === 'OPTIONS') {
res.writeHead(200);
res.end();
return;
} }
try { return new Response(file, {
// API: 列出可用日期 status: 200,
if (pathname === '/api/dates') { headers: CORS_HEADERS
const dates = listAvailableDates(); });
res.writeHead(200, { 'Content-Type': 'application/json' }); }
res.end(JSON.stringify({ dates }));
return; const server = Bun.serve({
hostname: HOST,
port: PORT,
async fetch(req) {
if (req.method === 'OPTIONS') {
return new Response(null, {
status: 204,
headers: CORS_HEADERS
});
} }
// API: 获取指定日期数据 try {
if (pathname.startsWith('/api/data/')) { const { pathname } = new URL(req.url);
const date = pathname.replace('/api/data/', '');
const filePath = path.join(DATA_DIR, `${date}.json`); if (pathname === '/api/dates') {
return json({ dates: listAvailableDates() });
if (fs.existsSync(filePath)) {
const data = await readFile(filePath);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(data);
} else {
res.writeHead(404, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: '数据不存在' }));
} }
return;
} if (pathname.startsWith('/api/data/')) {
const date = pathname.replace('/api/data/', '');
// 静态文件服务 const filePath = resolveSafePath(DATA_DIR, `${date}.json`);
if (pathname === '/') {
pathname = '/index.html'; if (!filePath) {
} return json({ error: '非法路径' }, 403);
}
// 处理 data 和 pic 路径
let filePath; const file = Bun.file(filePath);
if (pathname.startsWith('/data/')) { if (!(await file.exists())) {
filePath = path.join(DATA_DIR, pathname.replace('/data/', '')); return json({ error: '数据不存在' }, 404);
} else if (pathname.startsWith('/pic/')) { }
filePath = path.join(PIC_DIR, pathname.replace('/pic/', ''));
} else if (pathname.startsWith('/public/')) { return new Response(file, {
filePath = path.join(BASE_DIR, pathname); status: 200,
} else { headers: {
filePath = path.join(WEB_DIR, pathname); ...CORS_HEADERS,
} 'Content-Type': 'application/json; charset=utf-8'
}
// 安全检查:防止目录遍历 });
if (!filePath.startsWith(BASE_DIR)) { }
res.writeHead(403, { 'Content-Type': 'text/plain' });
res.end('Forbidden'); let filePath;
return; if (pathname === '/') {
} filePath = path.join(WEB_DIR, 'index.html');
} else if (pathname.startsWith('/data/')) {
const data = await readFile(filePath); filePath = resolveSafePath(DATA_DIR, pathname.slice('/data/'.length));
const mimeType = getMimeType(filePath); } else if (pathname.startsWith('/pic/')) {
filePath = resolveSafePath(PIC_DIR, pathname.slice('/pic/'.length));
res.writeHead(200, { 'Content-Type': mimeType }); } else if (pathname.startsWith('/public/')) {
res.end(data); filePath = resolveSafePath(PUBLIC_DIR, pathname.slice('/public/'.length));
} else {
} catch (err) { filePath = resolveSafePath(WEB_DIR, pathname);
console.error('文件读取错误:', err); }
if (err.code === 'ENOENT') {
res.writeHead(404, { 'Content-Type': 'text/plain' }); if (!filePath) {
res.end('Not Found'); return text('Forbidden', 403);
} else { }
console.error('服务器错误:', err);
res.writeHead(500, { 'Content-Type': 'text/plain' }); return serveFile(filePath);
res.end('Internal Server Error'); } catch (error) {
console.error('服务器错误:', error);
return text('Internal Server Error', 500);
} }
} }
}); });
// 启动服务器 console.log('==========================================');
server.listen(PORT, 'localhost', () => { console.log('北京市房地产数据监控服务器已启动');
console.log('=========================================='); console.log(`访问地址: http://${HOST}:${PORT}`);
console.log('北京市房地产数据监控服务器已启动'); console.log('按 Ctrl+C 停止服务器');
console.log(`访问地址: http://localhost:${PORT}`); console.log('==========================================');
console.log('按 Ctrl+C 停止服务器');
console.log('==========================================');
});
// 优雅关闭 function shutdown(signal) {
process.on('SIGTERM', () => { console.log(`\n${signal}\n正在关闭服务器...`);
console.log('\nSIGTERM\n正在关闭服务器...'); server.stop(true);
server.close(() => { console.log('服务器已关闭');
console.log('服务器已关闭'); process.exit(0);
process.exit(0); }
});
});
process.on('SIGINT', () => { process.on('SIGTERM', () => shutdown('SIGTERM'));
console.log('\nSIGINT\n正在关闭服务器...'); process.on('SIGINT', () => shutdown('SIGINT'));
server.close(() => {
console.log('服务器已关闭');
process.exit(0);
});
});

View File

@ -2,4 +2,4 @@
# 启动 houseDream # 启动 houseDream
# cd /app/houseDream # cd /app/houseDream
pnpm run server bun run start