commit bd8aed9f4fe094ce6b082a100ad4ec31146fc0d0 Author: root Date: Fri Mar 20 21:53:19 2026 +0800 inti commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08d678d --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Dependencies +node_modules/ + +# Logs +logs/ +*.log +cron.log + +# Runtime data +pids/ +*.pid +*.seed +*.pid.lock + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env + +# OS files +.DS_Store +Thumbs.db + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Data and pics (optional - uncomment if you don't want to track them) +# data/ +# pic/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..41df1cc --- /dev/null +++ b/README.md @@ -0,0 +1,130 @@ +# HouseDream - 北京市房地产数据监控系统 + +自动爬取、提取并可视化展示北京市住房和城乡建设委员会的房地产交易数据。 + +## 功能特性 + +- **自动爬取**: 使用 Playwright 定时抓取官方页面数据 +- **数据提取**: 结构化提取商品房和存量房交易统计 +- **截图存档**: 每日完整页面截图保存 +- **可视化展示**: Web 界面展示历史数据,支持日期切换 +- **API 接口**: 提供 RESTful API 供外部调用 + +## 数据模块 + +### 商品房数据统计 +- 可售期房统计 +- 预售许可(上月汇总) +- 期房网上认购(当日) +- 期房网上签约(当日) +- 未签约现房统计 +- 现房项目情况 +- 现房网上认购(当日) +- 现房网上签约(当日) + +### 存量房网上签约统计 +- 月签约统计 +- 日签约统计 + +### 存量房维度统计 +- 按经纪机构统计(Top 10) +- 按所在区县统计 +- 按建筑面积统计 + +## 项目结构 + +``` +/app/houseDream/ +├── data/ # JSON 数据文件 +├── pic/ # 每日页面截图 +├── web/ # 前端界面 +│ ├── index.html # 主页面 +│ ├── style.css # 样式 +│ ├── app.js # 前端逻辑 +│ └── define.js # 数据定义 +├── scripts/ # 后端脚本 +│ ├── daily.js # 完整爬取流程 +│ └── screenshot.js # 仅截图 +├── public/ # 公共资源 +│ └── extract.js # 数据提取逻辑 +├── server.js # Web 服务器 +├── package.json # 项目配置 +└── README.md # 本文件 +``` + +## 安装与运行 + +### 依赖安装 +```bash +cd /app/houseDream +npm install +``` + +### 手动运行 + +```bash +# 启动 Web 服务器 +npm run server + +# 执行完整爬取(截图+数据提取) +npm run daily + +# 仅截图 +npm run screenshot +``` + +### PM2 管理(推荐) + +```bash +# 启动服务 +pm2 start server.js --name houseDream + +# 查看状态 +pm2 list +pm2 logs houseDream + +# 重启/停止 +pm2 restart houseDream +pm2 stop houseDream + +# 设置开机自启 +pm2 save +pm2 startup +``` + +## API 接口 + +| 接口 | 方法 | 说明 | +|------|------|------| +| `/` | GET | 可视化界面 | +| `/api/dates` | GET | 获取可用日期列表 | +| `/api/data/YYYY-MM-DD` | GET | 获取指定日期数据 | +| `/pic/YYYY-MM-DD.png` | GET | 获取指定日期截图 | + +## 数据来源 + +- **官方网址**: http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749 +- **来源机构**: 北京市住房和城乡建设委员会 + +## 技术栈 + +- **爬虫**: Playwright (Chromium) +- **后端**: Node.js + 原生 HTTP 模块 +- **前端**: HTML5 + CSS3 + Vanilla JavaScript +- **进程管理**: PM2 + +## 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `PORT` | 8080 | 服务器监听端口 | + +## 注意事项 + +1. 数据文件永久保留,不会自动清理 +2. 页面使用 Playwright 获取完整渲染后的内容 +3. 服务器默认绑定到 localhost,如需外网访问请修改 server.js 中的监听地址 + +## 许可证 + +ISC diff --git a/data/2026-03-19.json b/data/2026-03-19.json new file mode 100644 index 0000000..aecaffe --- /dev/null +++ b/data/2026-03-19.json @@ -0,0 +1,159 @@ +{ + "date": "2026-03-19", + "timestamp": "2026-03-19T10:00:19.723Z", + "source": "http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749", + "data": { + "spfsjtj": { + "kspf": { + "kspf_total_units": "88182", + "kspf_total_area": "7434268.3100", + "kspf_residential_units": "38223", + "kspf_residential_area": "2020", + "kspf_commercial_units": "214", + "kspf_office_units": "533", + "kspf_parking_units": "39765" + }, + "ysxk": { + "ysxk_license_count": "8", + "ysxk_total_area": "314659.9000", + "ysxk_residential_units": "38223", + "ysxk_residential_area": "2020", + "ysxk_commercial_units": "214", + "ysxk_office_units": "533", + "ysxk_parking_units": "39765" + }, + "qfrg": { + "qfrg_total_units": "82", + "qfrg_total_area": "9521.5700", + "qfrg_residential_units": "38223", + "qfrg_residential_area": "2020", + "qfrg_commercial_units": "214", + "qfrg_office_units": "533", + "qfrg_parking_units": "39765" + }, + "qfqy": { + "qfqy_total_units": "73", + "qfqy_total_area": "7695.9100", + "qfqy_residential_units": "38223", + "qfqy_residential_area": "2020", + "qfqy_commercial_units": "214", + "qfqy_office_units": "533", + "qfqy_parking_units": "39765" + }, + "wyxf": { + "wyxf_total_units": "213714", + "wyxf_total_area": "11396131.1500", + "wyxf_residential_units": "38223", + "wyxf_residential_area": "2020", + "wyxf_commercial_units": "214", + "wyxf_office_units": "533", + "wyxf_parking_units": "39765" + }, + "xfxm": { + "xfxm_project_count": "39891", + "xfxm_residential_units": "38223", + "xfxm_residential_area": "2020", + "xfxm_commercial_units": "214", + "xfxm_office_units": "533", + "xfxm_parking_units": "39765", + "xfxm_parking_area": "279369787.5600" + }, + "xfrg": { + "xfrg_total_units": "82", + "xfrg_residential_units": "38223", + "xfrg_residential_area": "2020", + "xfrg_commercial_units": "214", + "xfrg_office_units": "533", + "xfrg_parking_units": "39765", + "xfrg_parking_area": "9521.5700" + }, + "xfqy": { + "xfqy_total_units": "73", + "xfqy_residential_units": "38223", + "xfqy_residential_area": "2020", + "xfqy_commercial_units": "214", + "xfqy_office_units": "533", + "xfqy_parking_units": "39765", + "xfqy_parking_area": "7695.9100" + } + }, + "clfwsqytj": { + "clf_month": { + "clf_month_total_units": "73", + "clf_month_total_area": "7695.9100", + "clf_month_residential_units": "8130", + "clf_month_residential_area": "721746.3800" + }, + "clf_day": { + "clf_day_total_units": "73", + "clf_day_total_area": "7695.9100", + "clf_day_residential_units": "8130", + "clf_day_residential_area": "721746.3800" + } + }, + "clfwdtj": { + "broker": [ + { + "broker_seq": 1, + "broker_name": "北京链家置地房地产经纪有限公司", + "broker_deal_units": 3968, + "broker_refund_units": 67 + }, + { + "broker_seq": 2, + "broker_name": "北京我爱我家房地产经纪有限公司", + "broker_deal_units": 860, + "broker_refund_units": 27 + }, + { + "broker_seq": 3, + "broker_name": "北京我爱我家华熙房地产经纪有限公司", + "broker_deal_units": 191, + "broker_refund_units": 4 + }, + { + "broker_seq": 4, + "broker_name": "北京金色时光房地产经纪有限公司", + "broker_deal_units": 183, + "broker_refund_units": 7 + }, + { + "broker_seq": 5, + "broker_name": "北京麦田房产经纪有限公司", + "broker_deal_units": 167, + "broker_refund_units": 2 + }, + { + "broker_seq": 6, + "broker_name": "北京市易合房地产经纪有限责任公司", + "broker_deal_units": 54, + "broker_refund_units": 0 + }, + { + "broker_seq": 7, + "broker_name": "汇石凯岩(北京)停车管理有限公司", + "broker_deal_units": 45, + "broker_refund_units": 0 + }, + { + "broker_seq": 8, + "broker_name": "北京金城阜业房地产经纪有限公司", + "broker_deal_units": 40, + "broker_refund_units": 1 + }, + { + "broker_seq": 9, + "broker_name": "北京市兴商房地产经纪中心有限公司", + "broker_deal_units": 36, + "broker_refund_units": 1 + }, + { + "broker_seq": 10, + "broker_name": "京安驰(北京)房地产经纪有限公司", + "broker_deal_units": 30, + "broker_refund_units": 3 + } + ] + } + } +} \ No newline at end of file diff --git a/data/2026-03-20.json b/data/2026-03-20.json new file mode 100644 index 0000000..dc3f127 --- /dev/null +++ b/data/2026-03-20.json @@ -0,0 +1,307 @@ +{ + "date": "2026-03-20", + "timestamp": 1773998194623, + "source": "http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749", + "data": { + "spfsjtj": { + "ksqf": { + "kspf_total_units": "88034", + "kspf_total_area": "7423053.0600", + "kspf_residential_units": "38160", + "kspf_residential_area": "5321452.7700", + "kspf_commercial_units": "214", + "kspf_commercial_area": "159917.8800", + "kspf_office_units": "533", + "kspf_office_area": "517721.2300", + "kspf_parking_units": "39683", + "kspf_parking_area": "1111569.5900" + }, + "ysxk": { + "ysxk_license_count": "8", + "ysxk_total_area": "314659.9000", + "ysxk_residential_units": "2020", + "ysxk_residential_area": "264160.8100", + "ysxk_commercial_units": "0", + "ysxk_commercial_area": "0.0000", + "ysxk_office_units": "0", + "ysxk_office_area": "0.0000", + "ysxk_parking_units": "1384", + "ysxk_parking_area": "38546.2200" + }, + "qfrg": { + "qfrg_total_units": "85", + "qfrg_total_area": "10122.0000", + "qfrg_residential_units": "83", + "qfrg_residential_area": "10055.8700", + "qfrg_commercial_units": "0", + "qfrg_commercial_area": "0.0000", + "qfrg_office_units": "0", + "qfrg_office_area": "0.0000", + "qfrg_parking_units": "1", + "qfrg_parking_area": "33.8500" + }, + "qfqy": { + "qfqy_total_units": "159", + "qfqy_total_area": "12186.3400", + "qfqy_residential_units": "70", + "qfqy_residential_area": "8725.2800", + "qfqy_commercial_units": "0", + "qfqy_commercial_area": "0.0000", + "qfqy_office_units": "0", + "qfqy_office_area": "0.0000", + "qfqy_parking_units": "86", + "qfqy_parking_area": "3340.8800" + }, + "wyxf": { + "wyxf_total_units": "214061", + "wyxf_total_area": "11463428.8300", + "wyxf_residential_units": "30104", + "wyxf_residential_area": "3434275.6000", + "wyxf_commercial_units": "1739", + "wyxf_commercial_area": "798046.9800", + "wyxf_office_units": "4427", + "wyxf_office_area": "1544716.4200", + "wyxf_parking_units": "124541", + "wyxf_parking_area": "4159255.5300" + }, + "xfxm": { + "xfxm_project_count": "39902", + "xfxm_total_area": "279449030.4600", + "xfxm_residential_units": "937508", + "xfxm_residential_area": "116806207.2800", + "xfxm_commercial_units": "96927", + "xfxm_commercial_area": "23085347.5300", + "xfxm_office_units": "134740", + "xfxm_office_area": "25167845.3100", + "xfxm_parking_units": "826833", + "xfxm_parking_area": "31551000.9200" + }, + "xfrg": { + "xfrg_total_units": "38", + "xfrg_total_area": "4251.7000", + "xfrg_residential_units": "34", + "xfrg_residential_area": "4140.6000", + "xfrg_commercial_units": "0", + "xfrg_commercial_area": "0.0000", + "xfrg_office_units": "0", + "xfrg_office_area": "0.0000", + "xfrg_parking_units": "2", + "xfrg_parking_area": "70.7200" + }, + "xfqy": { + "xfqy_total_units": "100", + "xfqy_total_area": "6356.1100", + "xfqy_residential_units": "30", + "xfqy_residential_area": "3942.5900", + "xfqy_commercial_units": "1", + "xfqy_commercial_area": "280.5500", + "xfqy_office_units": "0", + "xfqy_office_area": "0.0000", + "xfqy_parking_units": "44", + "xfqy_parking_area": "1377.8100" + } + }, + "clfwsqytj": { + "clf_month": { + "clf_month_total_units": "9111", + "clf_month_total_area": "772959.8600", + "clf_month_residential_units": "8130", + "clf_month_residential_area": "721746.3800" + }, + "clf_day": { + "clf_day_total_units": "1014", + "clf_day_total_area": "87806.6200", + "clf_day_residential_units": "916", + "clf_day_residential_area": "79638.2500" + } + }, + "clfwdtj": { + "broker": [ + { + "broker_seq": "1", + "broker_name": "北京链家置地房地产经纪有限公司", + "broker_deal_units": "3968", + "broker_refund_units": "67" + }, + { + "broker_seq": "2", + "broker_name": "北京我爱我家房地产经纪有限公司", + "broker_deal_units": "860", + "broker_refund_units": "27" + }, + { + "broker_seq": "3", + "broker_name": "北京我爱我家华熙房地产经纪有限公司", + "broker_deal_units": "191", + "broker_refund_units": "4" + }, + { + "broker_seq": "4", + "broker_name": "北京金色时光房地产经纪有限公司", + "broker_deal_units": "183", + "broker_refund_units": "7" + }, + { + "broker_seq": "5", + "broker_name": "北京麦田房产经纪有限公司", + "broker_deal_units": "167", + "broker_refund_units": "2" + }, + { + "broker_seq": "6", + "broker_name": "北京市易合房地产经纪有限责任公司", + "broker_deal_units": "54", + "broker_refund_units": "0" + }, + { + "broker_seq": "7", + "broker_name": "汇石凯岩(北京)停车管理有限公司", + "broker_deal_units": "45", + "broker_refund_units": "0" + }, + { + "broker_seq": "8", + "broker_name": "北京金城阜业房地产经纪有限公司", + "broker_deal_units": "40", + "broker_refund_units": "1" + }, + { + "broker_seq": "9", + "broker_name": "北京市兴商房地产经纪中心有限公司", + "broker_deal_units": "36", + "broker_refund_units": "1" + }, + { + "broker_seq": "10", + "broker_name": "京安驰(北京)房地产经纪有限公司", + "broker_deal_units": "30", + "broker_refund_units": "3" + } + ], + "district": [ + { + "district_name": "全 市", + "district_deal_units": "9111.0", + "district_deal_area": "772959.86" + }, + { + "district_name": "东 城", + "district_deal_units": "368", + "district_deal_area": "26395.3600" + }, + { + "district_name": "西 城", + "district_deal_units": "526", + "district_deal_area": "33057.9300" + }, + { + "district_name": "朝 阳", + "district_deal_units": "2150", + "district_deal_area": "190029.8400" + }, + { + "district_name": "海 淀", + "district_deal_units": "1026", + "district_deal_area": "81386.1200" + }, + { + "district_name": "丰 台", + "district_deal_units": "951", + "district_deal_area": "76383.6700" + }, + { + "district_name": "石景山", + "district_deal_units": "276", + "district_deal_area": "20243.8900" + }, + { + "district_name": "通 州", + "district_deal_units": "651", + "district_deal_area": "57381.0200" + }, + { + "district_name": "房 山", + "district_deal_units": "477", + "district_deal_area": "39740.7200" + }, + { + "district_name": "顺 义", + "district_deal_units": "477", + "district_deal_area": "48649.5400" + }, + { + "district_name": "门头沟", + "district_deal_units": "295", + "district_deal_area": "21153.3900" + }, + { + "district_name": "大 兴", + "district_deal_units": "590", + "district_deal_area": "51463.3600" + }, + { + "district_name": "怀 柔", + "district_deal_units": "107", + "district_deal_area": "10798.1600" + }, + { + "district_name": "密 云", + "district_deal_units": "196", + "district_deal_area": "19915.1000" + }, + { + "district_name": "昌 平", + "district_deal_units": "712", + "district_deal_area": "67716.5300" + }, + { + "district_name": "延 庆", + "district_deal_units": "102", + "district_deal_area": "8821.4400" + }, + { + "district_name": "平 谷", + "district_deal_units": "108", + "district_deal_area": "10040.0800" + }, + { + "district_name": "开发区", + "district_deal_units": "99", + "district_deal_area": "9783.7100" + } + ], + "area": [ + { + "area_range": "60m2以下", + "area_deal_units": "2828", + "area_deal_percent": "127669.4500" + }, + { + "area_range": "60~80m2", + "area_deal_units": "2200", + "area_deal_percent": "152566.2600" + }, + { + "area_range": "80~100m2", + "area_deal_units": "1999", + "area_deal_percent": "177844.6000" + }, + { + "area_range": "100~120m2", + "area_deal_units": "791", + "area_deal_percent": "86075.4300" + }, + { + "area_range": "120~140m2", + "area_deal_units": "550", + "area_deal_percent": "71435.1700" + }, + { + "area_range": "140m2以上", + "area_deal_units": "743", + "area_deal_percent": "157368.9500" + } + ] + } + } +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..37ce143 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,60 @@ +{ + "name": "housedream", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "housedream", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "playwright": "^1.58.2" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "http://mirrors.tencentyun.com/npm/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "http://mirrors.tencentyun.com/npm/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "http://mirrors.tencentyun.com/npm/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..3433786 --- /dev/null +++ b/package.json @@ -0,0 +1,17 @@ +{ + "name": "housedream", + "version": "1.0.0", + "description": "北京市房地产数据监控系统 - 自动爬取、提取、可视化展示", + "main": "server.js", + "scripts": { + "server": "node ./server.js", + "screenshot": "node scripts/screenshot.js", + "daily": "node scripts/daily.js" + }, + "keywords": ["房地产", "数据监控", "北京", "爬虫", "可视化"], + "author": "", + "license": "ISC", + "dependencies": { + "playwright": "^1.40.0" + } +} diff --git a/pic/2026-03-19.png b/pic/2026-03-19.png new file mode 100644 index 0000000..3f57883 Binary files /dev/null and b/pic/2026-03-19.png differ diff --git a/pic/2026-03-20.png b/pic/2026-03-20.png new file mode 100644 index 0000000..b8e25d8 Binary files /dev/null and b/pic/2026-03-20.png differ diff --git a/public/extract.js b/public/extract.js new file mode 100644 index 0000000..826f5f9 --- /dev/null +++ b/public/extract.js @@ -0,0 +1,91 @@ +const getSpfsjtjData = () => { + const res = {}; + const root = document.querySelectorAll('.portlet')[1]; + Object.keys(rootDefined.spfsjtj.childMap).forEach((moduleKey, index) => { + res[moduleKey] = {}; + const moduleDom = root.querySelectorAll('td table')[index]; + const moduleFiledMap = rootDefined.spfsjtj.childMap[moduleKey].childMap; + Object.keys(moduleFiledMap).forEach((fieldKey, fieldIndex) => { + const fieldDom = moduleDom.querySelectorAll('tr')[fieldIndex + 1].querySelectorAll('td')[1]; + res[moduleKey][fieldKey] = fieldDom.innerText.trim(); + }); + }); + return res; +} + +const getClfwsqytjData = () => { + const res = {}; + const root = document.querySelectorAll('.portlet')[2]; + Object.keys(rootDefined.clfwsqytj.childMap).forEach((moduleKey, index) => { + res[moduleKey] = {}; + const moduleDom = root.querySelectorAll('td table')[index]; + const moduleFiledMap = rootDefined.clfwsqytj.childMap[moduleKey].childMap; + Object.keys(moduleFiledMap).forEach((fieldKey, fieldIndex) => { + const fieldDom = moduleDom.querySelectorAll('tr')[fieldIndex + 1].querySelectorAll('td')[1]; + res[moduleKey][fieldKey] = fieldDom.innerText.replaceAll(' ', ''); + }); + }); + return res; +} + +const getClfwdtjData = () => { + const res = {}; + const root = document.querySelectorAll('.portlet')[3]; + Object.keys(rootDefined.clfwdtj.childMap).forEach((moduleKey) => { + const tableDomList = root.querySelectorAll('td table'); + const moduleFiledMap = rootDefined.clfwdtj.childMap[moduleKey].childMap; + const dataList = []; + if (moduleKey === 'broker') { + // 纵向表格,数据在第2、3个table中 + const trList = [...tableDomList[1].querySelectorAll('tr')].slice(1).concat([...tableDomList[2].querySelectorAll('tr')].slice(1)); + trList.forEach(trDom => { + const dataItem = {}; + Object.keys(moduleFiledMap).forEach((fieldKey, fieldIndex) => { + const fieldDom = trDom.querySelectorAll('td')[fieldIndex]; + dataItem[fieldKey] = fieldDom.innerText.trim(); + }); + dataList.push(dataItem); + }); + } else if (moduleKey === 'area') { + // 横向表格,数据在第5个table中 + const trList = [...tableDomList[4].querySelectorAll('tr')]; + Object.keys(moduleFiledMap).forEach((fieldKey, fieldIndex) => { + const tdList = [...trList[fieldIndex].querySelectorAll('td')].slice(1); + tdList.forEach((tdDom, tdIndex) => { + if (!dataList[tdIndex]) { + dataList[tdIndex] = {}; + } + dataList[tdIndex][fieldKey] = tdDom.innerText.trim(); + }); + }); + } else if (moduleKey === 'district') { + // 横向表格,数据在第4个table中,叠加在一起,每组数组占据3行,例如,总共9行数据,则1-3为第一组,4-6为第二组,7-9为第三组,三组加起来为一个完整的数据项 + const trList = [...tableDomList[3].querySelectorAll('tr')]; + const groupSize = Object.keys(moduleFiledMap).length; // 每组数据占据的行数 + for (let trIndex = 0; trIndex < trList.length; trIndex++) { + const fieldIndex = trIndex % groupSize; // 当前行对应的字段索引 + const tdList = [...trList[trIndex].querySelectorAll('td')].slice(1); + const startIndex = Math.floor(trIndex / groupSize) * tdList.length; // 当前组的起始行索引 + tdList.forEach((tdDom, tdIndex) => { + if (!dataList[startIndex + tdIndex]) { + dataList[startIndex + tdIndex] = {}; + } + const fieldKey = Object.keys(moduleFiledMap)[fieldIndex]; + dataList[startIndex + tdIndex][fieldKey] = tdDom.innerText.trim(); + }); + + } + } + res[moduleKey] = dataList; + }); + return res; +} + + +const extractData = () => { + return { + spfsjtj: getSpfsjtjData(), + clfwsqytj: getClfwsqytjData(), + clfwdtj: getClfwdtjData(), + } +} \ No newline at end of file diff --git a/scripts/daily.js b/scripts/daily.js new file mode 100644 index 0000000..54fd43f --- /dev/null +++ b/scripts/daily.js @@ -0,0 +1,121 @@ +/** + * 每日爬取主脚本 + * 执行截图 + 数据获取 + 数据提取 + */ +const { chromium } = require('playwright'); + +const fs = require('fs'); +const path = require('path'); + +const extractFunc = require('../utils/extract_data'); + +const BASE_DIR = '/app/houseDream'; +// const BASE_DIR = '/Users/liyanyan/vps/tencet-ecs/app/houseDream'; +const PIC_DIR = path.join(BASE_DIR, 'pic'); +const DATA_DIR = path.join(BASE_DIR, 'data'); + +const TARGET_URL = 'http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749'; + +// 获取当前日期 +function getToday() { + return new Date().toISOString().split('T')[0]; +} + +// 主函数 +async function main() { + const today = getToday(); + + console.log('=========================================='); + console.log(`开始爬取: ${today}`); + console.log(`目标URL: ${TARGET_URL}`); + console.log('==========================================\n'); + + const picPath = path.join(PIC_DIR, `${today}.png`); + const dataPath = path.join(DATA_DIR, `${today}.json`); + + // 确保输出目录存在 + if (!fs.existsSync(PIC_DIR)) { + fs.mkdirSync(PIC_DIR, { recursive: true }); + } + if (!fs.existsSync(DATA_DIR)) { + fs.mkdirSync(DATA_DIR, { recursive: true }); + } + + const browser = await chromium.launch({ + headless: true, + args: ['--no-sandbox', '--disable-setuid-sandbox'] + }); + + try { + const context = await browser.newContext({ + viewport: { width: 1920, height: 1080 } + }); + + const page = await context.newPage(); + + // 设置超时 + page.setDefaultTimeout(60000); + page.setDefaultNavigationTimeout(60000); + + // 访问页面 + await page.goto(TARGET_URL, { + waitUntil: 'networkidle', + timeout: 60000 + }); + + // 等待页面加载完成 + await page.waitForLoadState('domcontentloaded'); + + // 额外等待3秒确保动态内容加载 + await page.waitForTimeout(3000); + + // 截图 + await page.screenshot({ + path: picPath, + fullPage: true + }); + + console.log(` ✓ 截图已保存: ${picPath}`); + + for (let p of ['define.js', 'extract.js']) { + const injectJsPath = path.join(BASE_DIR, 'public', p); + const injectJsContent = fs.readFileSync(injectJsPath, 'utf-8'); + await page.addScriptTag({ content: injectJsContent }); + } + console.log(' ✓ 数据提取脚本已注入'); + + // 获取页面文本内容 + const data = await page.evaluate(() => { + return JSON.stringify(extractData()); + }); + console.log(' ✓ 数据已提取'); + + const content = JSON.stringify({ + date: today, + timestamp: Date.now(), + source: TARGET_URL, + data: JSON.parse(data) + }, null, 2); + + // 保存原始内容 + fs.writeFileSync(dataPath, content, 'utf-8'); + console.log(`\n ✓ 数据已保存: ${dataPath}`); + + } catch (error) { + console.error(` ✗ 操作失败: ${error.message}`); + process.exit(1); + } finally { + await browser.close(); + } + + console.log('\n=========================================='); + console.log(`爬取完成: ${today}`); + console.log(`截图文件: ${picPath}`); + console.log(`数据文件: ${dataPath}`); + console.log('=========================================='); +} + +main().catch(err => { + console.error('执行失败:', err); + process.exit(1); +}); diff --git a/scripts/screenshot.js b/scripts/screenshot.js new file mode 100644 index 0000000..3720439 --- /dev/null +++ b/scripts/screenshot.js @@ -0,0 +1,75 @@ +/** + * 截图脚本 - 使用 Playwright + */ + +const { chromium } = require('playwright'); +const fs = require('fs'); +const path = require('path'); + +const BASE_DIR = '/app/houseDream'; +// const BASE_DIR = '/Users/liyanyan/vps/tencet-ecs/app/houseDream'; +const PIC_DIR = path.join(BASE_DIR, 'pic'); + +const TARGET_URL = 'http://bjjs.zjw.beijing.gov.cn/eportal/ui?pageId=307749'; + +// 获取当前日期 +function getToday() { + return new Date().toISOString().split('T')[0]; +} + +async function screenshot(url, outputPath) { + console.log(`正在截图: ${url}`); + console.log(`输出路径: ${outputPath}`); + + const browser = await chromium.launch({ + headless: true, + args: ['--no-sandbox', '--disable-setuid-sandbox'] + }); + + try { + const context = await browser.newContext({ + viewport: { width: 1920, height: 1080 } + }); + + const page = await context.newPage(); + + // 设置超时 + page.setDefaultTimeout(60000); + page.setDefaultNavigationTimeout(60000); + + // 访问页面 + await page.goto(url, { + waitUntil: 'networkidle', + timeout: 60000 + }); + + // 等待页面加载完成 + await page.waitForLoadState('domcontentloaded'); + + // 额外等待3秒确保动态内容加载 + await page.waitForTimeout(3000); + + // 确保输出目录存在 + const outputDir = path.dirname(outputPath); + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + // 截图 + await page.screenshot({ + path: outputPath, + fullPage: true + }); + + console.log(` ✓ 截图已保存: ${outputPath}`); + + } catch (error) { + console.error(` ✗ 截图失败: ${error.message}`); + process.exit(1); + } finally { + await browser.close(); + } +} + +// 主程序 +screenshot(TARGET_URL, path.join(PIC_DIR, `${getToday()}.png`)); diff --git a/server.js b/server.js new file mode 100644 index 0000000..5696b54 --- /dev/null +++ b/server.js @@ -0,0 +1,168 @@ +/** + * Web服务器脚本 + * 提供静态文件服务和API接口 + */ + +const http = require('http'); +const fs = require('fs'); +const path = require('path'); +const url = require('url'); + +const BASE_DIR = '/app/houseDream'; +// const BASE_DIR = '/Users/liyanyan/vps/tencet-ecs/app/houseDream'; +const WEB_DIR = path.join(BASE_DIR, 'web'); +const DATA_DIR = path.join(BASE_DIR, 'data'); +const PIC_DIR = path.join(BASE_DIR, 'pic'); + +const PORT = process.env.PORT || 8080; + +// MIME类型映射 +const mimeTypes = { + '.html': 'text/html', + '.css': 'text/css', + '.js': 'application/javascript', + '.json': 'application/json', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.gif': 'image/gif', + '.ico': 'image/x-icon' +}; + +// 获取MIME类型 +function getMimeType(filePath) { + const ext = path.extname(filePath).toLowerCase(); + return mimeTypes[ext] || 'application/octet-stream'; +} + +// 读取文件 +function readFile(filePath) { + return new Promise((resolve, reject) => { + fs.readFile(filePath, (err, data) => { + if (err) reject(err); + else resolve(data); + }); + }); +} + +// 列出可用日期 +function listAvailableDates() { + try { + const files = fs.readdirSync(DATA_DIR); + return files + .filter(f => f.endsWith('.json') && !f.includes('_raw') && !f.includes('test')) + .map(f => f.replace('.json', '')) + .sort() + .reverse(); + } catch (err) { + return []; + } +} + +// 创建服务器 +const server = http.createServer(async (req, res) => { + const parsedUrl = url.parse(req.url, true); + let pathname = parsedUrl.pathname; + + // 设置CORS头 + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); + + if (req.method === 'OPTIONS') { + res.writeHead(200); + res.end(); + return; + } + + try { + // API: 列出可用日期 + if (pathname === '/api/dates') { + const dates = listAvailableDates(); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ dates })); + return; + } + + // API: 获取指定日期数据 + if (pathname.startsWith('/api/data/')) { + const date = pathname.replace('/api/data/', ''); + const filePath = path.join(DATA_DIR, `${date}.json`); + + if (fs.existsSync(filePath)) { + const data = await readFile(filePath); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(data); + } else { + res.writeHead(404, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: '数据不存在' })); + } + return; + } + + // 静态文件服务 + if (pathname === '/') { + pathname = '/index.html'; + } + + // 处理 data 和 pic 路径 + let filePath; + if (pathname.startsWith('/data/')) { + filePath = path.join(DATA_DIR, pathname.replace('/data/', '')); + } else if (pathname.startsWith('/pic/')) { + filePath = path.join(PIC_DIR, pathname.replace('/pic/', '')); + } else if (pathname.startsWith('/public/')) { + filePath = path.join(BASE_DIR, pathname); + } else { + filePath = path.join(WEB_DIR, pathname); + } + + // 安全检查:防止目录遍历 + if (!filePath.startsWith(BASE_DIR)) { + res.writeHead(403, { 'Content-Type': 'text/plain' }); + res.end('Forbidden'); + return; + } + + const data = await readFile(filePath); + const mimeType = getMimeType(filePath); + + res.writeHead(200, { 'Content-Type': mimeType }); + res.end(data); + + } catch (err) { + if (err.code === 'ENOENT') { + res.writeHead(404, { 'Content-Type': 'text/plain' }); + res.end('Not Found'); + } else { + console.error('服务器错误:', err); + res.writeHead(500, { 'Content-Type': 'text/plain' }); + res.end('Internal Server Error'); + } + } +}); + +// 启动服务器 +server.listen(PORT, 'localhost', () => { + console.log('=========================================='); + console.log('北京市房地产数据监控服务器已启动'); + console.log(`访问地址: http://localhost:${PORT}`); + console.log('按 Ctrl+C 停止服务器'); + console.log('=========================================='); +}); + +// 优雅关闭 +process.on('SIGTERM', () => { + console.log('\n正在关闭服务器...'); + server.close(() => { + console.log('服务器已关闭'); + process.exit(0); + }); +}); + +process.on('SIGINT', () => { + console.log('\n正在关闭服务器...'); + server.close(() => { + console.log('服务器已关闭'); + process.exit(0); + }); +}); diff --git a/web/app.js b/web/app.js new file mode 100644 index 0000000..a12dc12 --- /dev/null +++ b/web/app.js @@ -0,0 +1,224 @@ +// 房地产数据监控前端脚本 + +// 配置 +const CONFIG = { + apiBase: '/api', + dateFormat: /^(\d{4})-(\d{2})-(\d{2})$/ +}; + +// DOM元素 +const elements = { + datePicker: document.getElementById('datePicker'), + loadBtn: document.getElementById('loadBtn'), + todayBtn: document.getElementById('todayBtn'), + loading: document.getElementById('loading'), + error: document.getElementById('error'), + content: document.getElementById('content'), + dataDate: document.getElementById('dataDate'), + lastUpdate: document.getElementById('lastUpdate') +}; + +// 表格映射 +const tableMap = { + 'ksqf': 'kespTable', + 'ysxk': 'ysxkTable', + 'qfrg': 'qfrgTable', + 'qfqy': 'qfqyTable', + 'wyxf': 'wyqxTable', + 'xfxm': 'xfxmTable', + 'xfrg': 'xfrgTable', + 'xfqy': 'xfqyTable', + 'clf_month': 'clfyqyTable', + 'clf_day': 'clfrqyTable' +}; + +// 初始化 +function init() { + // 设置日期选择器默认值为今天 + const today = new Date().toISOString().split('T')[0]; + elements.datePicker.value = today; + + // 绑定事件 + elements.loadBtn.addEventListener('click', () => loadData(elements.datePicker.value)); + elements.todayBtn.addEventListener('click', () => { + elements.datePicker.value = today; + loadData(today); + }); + + // 加载今天的数据 + loadData(today); + + // 更新最后更新时间 + elements.lastUpdate.textContent = new Date().toLocaleString('zh-CN'); +} + +// 加载数据 +async function loadData(date) { + if (!date) { + showError('请选择日期'); + return; + } + + showLoading(); + + try { + const response = await fetch(`${CONFIG.apiBase}/data/${date}`); + + if (!response.ok) { + throw new Error(`未找到 ${date} 的数据`); + } + + const data = await response.json(); + + if (data.error) { + throw new Error(data.error); + } + + displayData(data); + showContent(); + } catch (err) { + showError(`加载失败: ${err.message}`); + } +} + +// 显示数据 +function displayData(data) { + elements.dataDate.textContent = data.date; + + // 商品房数据统计 + const spfData = data.data.spfsjtj; + for (const [key, value] of Object.entries(spfData)) { + const tableId = tableMap[key]; + if (tableId) { + renderSimpleTable(document.getElementById(tableId), value, rootDefined.spfsjtj.childMap[key].childMap); + } + } + + // 存量房月统计 + const clfData = data.data.clfwsqytj; + for (const [key, value] of Object.entries(clfData)) { + const tableId = tableMap[key]; + if (tableId) { + renderSimpleTable(document.getElementById(tableId), value, rootDefined.clfwsqytj.childMap[key].childMap); + } + } + + // 经纪机构表格 + const jjjgData = data.data.clfwdtj.broker; + renderBrokerTable(document.getElementById('jjjgTable'), jjjgData, rootDefined.clfwdtj.childMap.broker.childMap); + + // 按所在区县表格 + const districtData = data.data.clfwdtj.district; + renderBrokerTable(document.getElementById('szqxTable'), districtData, rootDefined.clfwdtj.childMap.district.childMap); + + // 按建筑面积 + const areaData = data.data.clfwdtj.area; + renderBrokerTable(document.getElementById('jzmjTable'), areaData, rootDefined.clfwdtj.childMap.area.childMap); + + // 加载截图 + loadScreenshot(data.date); +} + +// 加载截图 +function loadScreenshot(date) { + const container = document.getElementById('screenshotContainer'); + const img = new Image(); + img.src = `/pic/${date}.png`; + img.alt = '页面截图'; + img.className = 'screenshot-img'; + + img.onload = function() { + container.innerHTML = ''; + container.appendChild(img); + }; + + img.onerror = function() { + container.innerHTML = '

暂无截图

'; + }; +} + +// 渲染简单表格(键值对) +function renderSimpleTable(table, data, fieldMap) { + table.innerHTML = ''; + + if (!data || Object.keys(data).length === 0) { + table.innerHTML = '暂无数据'; + return; + } + + for (const item of Object.values(fieldMap)) { + const row = document.createElement('tr'); + row.innerHTML = ` + ${item.label} + ${data[item.key]} + `; + table.appendChild(row); + } +} + +// 渲染经纪机构表格 +function renderBrokerTable(table, data, fieldMap) { + table.innerHTML = ''; + + if (!data || data.length === 0) { + table.innerHTML = '暂无数据'; + return; + } + + // 表头 + const thead = document.createElement('thead'); + thead.innerHTML = ` + ${Object.values(fieldMap).map(item => `${item.label}`).join('')} + `; + table.appendChild(thead); + + // 表体 + const tbody = document.createElement('tbody'); + data.forEach(item => { + console.log('渲染经纪机构表格数据:', item); + const row = document.createElement('tr'); + row.innerHTML = Object.values(fieldMap).map(field => `${item[field.key]}`).join(''); + tbody.appendChild(row); + }); + table.appendChild(tbody); +} + +// 格式化数字 +function formatNumber(num) { + if (num === null || num === undefined) return '-'; + const n = parseFloat(num); + if (isNaN(n)) return num; + + // 如果是整数,添加千分位 + if (Number.isInteger(n)) { + return n.toLocaleString('zh-CN'); + } + + // 保留2位小数 + return n.toLocaleString('zh-CN', { minimumFractionDigits: 2, maximumFractionDigits: 2 }); +} + +// 显示加载状态 +function showLoading() { + elements.loading.classList.remove('hidden'); + elements.error.classList.add('hidden'); + elements.content.classList.add('hidden'); +} + +// 显示错误 +function showError(message) { + elements.loading.classList.add('hidden'); + elements.error.textContent = message; + elements.error.classList.remove('hidden'); + elements.content.classList.add('hidden'); +} + +// 显示内容 +function showContent() { + elements.loading.classList.add('hidden'); + elements.error.classList.add('hidden'); + elements.content.classList.remove('hidden'); +} + +// 启动 +document.addEventListener('DOMContentLoaded', init); diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..9261a3a --- /dev/null +++ b/web/index.html @@ -0,0 +1,138 @@ + + + + + + 北京市房地产数据监控 + + + +
+
+

🏠 北京市房地产数据监控

+

数据来源:北京市住房和城乡建设委员会

+
+ +
+ + + + +
+ + + + + + +
+

最后更新:

+
+
+ + + + + diff --git a/web/style.css b/web/style.css new file mode 100644 index 0000000..66d3dbb --- /dev/null +++ b/web/style.css @@ -0,0 +1,264 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif; + background: #f5f7fa; + color: #333; + line-height: 1.6; +} + +.container { + max-width: 1400px; + margin: 0 auto; + padding: 20px; +} + +header { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + padding: 30px; + border-radius: 12px; + margin-bottom: 20px; + text-align: center; +} + +header h1 { + font-size: 28px; + margin-bottom: 10px; +} + +header p { + opacity: 0.9; + font-size: 14px; +} + +.controls { + background: white; + padding: 20px; + border-radius: 12px; + margin-bottom: 20px; + box-shadow: 0 2px 8px rgba(0,0,0,0.1); + display: flex; + align-items: center; + gap: 15px; + flex-wrap: wrap; +} + +.controls label { + font-weight: 500; +} + +.controls input[type="date"] { + padding: 8px 12px; + border: 1px solid #ddd; + border-radius: 6px; + font-size: 14px; +} + +.controls button { + padding: 8px 20px; + border: none; + border-radius: 6px; + cursor: pointer; + font-size: 14px; + transition: all 0.3s; +} + +#loadBtn { + background: #667eea; + color: white; +} + +#loadBtn:hover { + background: #5a6fd6; +} + +#todayBtn { + background: #f0f0f0; + color: #333; +} + +#todayBtn:hover { + background: #e0e0e0; +} + +.loading, .error { + text-align: center; + padding: 40px; + background: white; + border-radius: 12px; + margin-bottom: 20px; +} + +.loading { + color: #667eea; +} + +.error { + color: #e74c3c; + background: #fdf2f2; +} + +.hidden { + display: none !important; +} + +.content { + animation: fadeIn 0.3s ease; +} + +@keyframes fadeIn { + from { opacity: 0; transform: translateY(10px); } + to { opacity: 1; transform: translateY(0); } +} + +.date-info { + background: white; + padding: 15px 20px; + border-radius: 12px; + margin-bottom: 20px; + box-shadow: 0 2px 8px rgba(0,0,0,0.1); +} + +.date-info h2 { + font-size: 18px; + color: #667eea; +} + +.data-section { + margin-bottom: 30px; +} + +.data-section h3 { + font-size: 20px; + margin-bottom: 15px; + color: #333; + padding-left: 10px; + border-left: 4px solid #667eea; +} + +.module-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); + gap: 20px; +} + +.module-card { + background: white; + padding: 20px; + border-radius: 12px; + box-shadow: 0 2px 8px rgba(0,0,0,0.1); + transition: transform 0.2s, box-shadow 0.2s; +} + +.module-card:hover { + transform: translateY(-2px); + box-shadow: 0 4px 16px rgba(0,0,0,0.15); +} + +.module-card.full-width { + grid-column: 1 / -1; +} + +.module-card h4 { + font-size: 16px; + margin-bottom: 15px; + color: #555; + padding-bottom: 10px; + border-bottom: 1px solid #eee; +} + +.data-table { + width: 100%; + border-collapse: collapse; + font-size: 14px; +} + +.data-table tr { + border-bottom: 1px solid #f0f0f0; +} + +.data-table tr:last-child { + border-bottom: none; +} + +.data-table td { + padding: 10px 5px; +} + +.data-table td:first-child { + color: #666; + width: 60%; +} + +.data-table td:last-child { + text-align: right; + font-weight: 500; + color: #667eea; +} + +/* 经纪机构表格样式 */ +#jjjgTable { + font-size: 13px; +} + +#jjjgTable th, +#jjjgTable td { + padding: 12px; + text-align: left; + border-bottom: 1px solid #eee; +} + +#jjjgTable th { + background: #f8f9fa; + font-weight: 600; + color: #555; +} + +#jjjgTable tr:hover { + background: #f8f9fa; +} + +#jjjgTable td:nth-child(3), +#jjjgTable td:nth-child(4) { + text-align: right; +} + +/* 截图样式 */ +#screenshotContainer { + text-align: center; + padding: 10px; +} + +.screenshot-img { + max-width: 100%; + height: auto; + border-radius: 8px; + box-shadow: 0 4px 12px rgba(0,0,0,0.15); +} + +footer { + text-align: center; + padding: 20px; + color: #999; + font-size: 13px; +} + +@media (max-width: 768px) { + .module-grid { + grid-template-columns: 1fr; + } + + .controls { + flex-direction: column; + align-items: stretch; + } + + header h1 { + font-size: 22px; + } +}