fix(M3): 改进 HTML 解析去除标签,优化面板样式

This commit is contained in:
李岩岩 2026-02-11 15:23:29 +08:00
parent a83d5d7bb9
commit 8bb5b2592d
3 changed files with 284 additions and 187 deletions

View File

@ -55,7 +55,7 @@ export class DictPanel {
.header {
padding: 12px 16px;
background-color: #4CAF50;
background-color: #5DBE8C;
color: white;
font-size: 16px;
font-weight: 500;
@ -65,7 +65,7 @@ export class DictPanel {
}
.header:hover {
background-color: #45a049;
background-color: #4da87a;
}
.content {
@ -75,6 +75,7 @@ export class DictPanel {
font-size: 14px;
line-height: 1.6;
color: #333;
background: #f5f5f5;
}
.placeholder {
@ -83,11 +84,16 @@ export class DictPanel {
margin-top: 100px;
}
/* 单词标题 */
.word-title {
font-size: 28px;
font-size: 32px;
font-weight: bold;
color: #333;
margin-bottom: 4px;
background: #fff;
padding: 12px 16px;
border-radius: 8px;
margin: -16px -16px 16px -16px;
}
.phonetic {
@ -97,125 +103,46 @@ export class DictPanel {
font-family: 'Times New Roman', serif;
}
.meanings-section {
margin-bottom: 16px;
}
.section-title {
font-size: 12px;
font-weight: bold;
color: #4CAF50;
margin-bottom: 8px;
text-transform: uppercase;
}
.meaning-item {
margin-bottom: 8px;
}
.part-of-speech {
color: #2196F3;
font-weight: 500;
margin-right: 8px;
}
.definition {
color: #333;
}
.examples-section {
margin-top: 16px;
}
.example-item {
/* 词典区块 */
.dict-section {
margin-bottom: 12px;
padding: 8px;
background: #f5f5f5;
border-radius: 4px;
}
.example-sentence {
color: #333;
margin-bottom: 4px;
}
.example-translation {
color: #666;
font-size: 13px;
}
.loading {
text-align: center;
padding: 40px 20px;
color: #666;
}
.loading-spinner {
width: 32px;
height: 32px;
border: 3px solid #e0e0e0;
border-top-color: #4CAF50;
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin: 0 auto 16px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-text {
font-size: 14px;
}
.dict-section {
margin-bottom: 16px;
padding-bottom: 16px;
border-bottom: 1px solid #eee;
}
.dict-section:last-child {
border-bottom: none;
}
.dict-section {
margin-bottom: 8px;
border: 1px solid #e0e0e0;
border-radius: 6px;
background: #fff;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.dict-header {
display: flex;
align-items: center;
padding: 10px 12px;
background: #f8f8f8;
background: #fff;
cursor: pointer;
user-select: none;
transition: background 0.2s;
border-bottom: 1px solid #f0f0f0;
}
.dict-header:hover {
background: #f0f0f0;
background: #fafafa;
}
.dict-toggle {
font-size: 10px;
margin-right: 6px;
color: #666;
margin-right: 8px;
color: #999;
width: 12px;
text-align: center;
}
.dict-icon {
width: 16px;
height: 16px;
width: 18px;
height: 18px;
border-radius: 3px;
margin-right: 8px;
display: flex;
align-items: center;
justify-content: center;
font-size: 10px;
font-size: 11px;
font-weight: bold;
color: white;
flex-shrink: 0;
@ -234,19 +161,103 @@ export class DictPanel {
}
.dict-name {
font-size: 13px;
font-size: 14px;
font-weight: 600;
color: #333;
flex: 1;
}
.dict-content {
padding: 12px;
padding: 12px 16px;
display: block;
}
.dict-content.collapsed {
display: none;
}
/* 释义 */
.meanings-section {
margin-bottom: 12px;
}
.section-title {
font-size: 12px;
font-weight: 600;
color: #4CAF50;
margin-bottom: 8px;
}
.meaning-item {
margin-bottom: 6px;
font-size: 14px;
}
.part-of-speech {
color: #2196F3;
font-weight: 500;
margin-right: 8px;
}
.definition {
color: #333;
}
/* 例句 */
.examples-section {
margin-top: 12px;
}
.example-item {
margin-bottom: 10px;
padding: 10px 12px;
background: #f8f8f8;
border-radius: 6px;
}
.example-number {
color: #999;
margin-right: 6px;
}
.example-sentence {
color: #333;
margin-bottom: 4px;
line-height: 1.5;
}
.example-translation {
color: #666;
font-size: 13px;
line-height: 1.5;
}
/* 加载状态 */
.loading {
text-align: center;
padding: 40px 20px;
color: #666;
background: #fff;
border-radius: 8px;
}
.loading-spinner {
width: 32px;
height: 32px;
border: 3px solid #e0e0e0;
border-top-color: #5DBE8C;
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin: 0 auto 16px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-text {
font-size: 14px;
}
</style>
<div class="panel">
<div class="header">词典结果</div>
@ -614,9 +625,12 @@ export class DictPanel {
// 最多显示2个例句
const displayExamples = examples.slice(0, 2);
const examplesHtml = displayExamples.map(e => `
const examplesHtml = displayExamples.map((e, index) => `
<div class="example-item">
<div class="example-sentence">${this._escapeHtml(e.sentence || '')}</div>
<div class="example-sentence">
<span class="example-number">${index + 1}.</span>
${this._escapeHtml(e.sentence || '')}
</div>
${e.translation ? `<div class="example-translation">${this._escapeHtml(e.translation)}</div>` : ''}
</div>
`).join('');

View File

@ -46,7 +46,6 @@ export class BingDictionary extends DictionaryBase {
const html = await response.text();
// 使用正则提取数据
return this._parseHtml(html, trimmedWord, url);
} catch (error) {
console.error('[BingDictionary] Search failed:', error);
@ -62,7 +61,17 @@ export class BingDictionary extends DictionaryBase {
}
/**
* 解析必应词典 HTML使用正则
* 去除 HTML 标签
* @private
* @param {string} html - 包含 HTML 的字符串
* @returns {string} 纯文本
*/
_stripHtml(html) {
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
}
/**
* 解析必应词典 HTML
* @private
* @param {string} html - HTML 内容
* @param {string} word - 查询的单词
@ -86,16 +95,24 @@ export class BingDictionary extends DictionaryBase {
* @returns {string} 音标
*/
_extractPhonetic(html) {
// 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
// 必应词典音标通常在 hd_pr 或 hd_tf 类中
const patterns = [
/<span[^>]*class="[^"]*hd_pr[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*hd_tf[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i
];
for (const pattern of patterns) {
const match = html.match(pattern);
if (match) {
return match[0];
return match[1].trim();
}
}
// 备选:匹配 /.../ 格式
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
if (match2) {
return match2[0];
// 通用音标匹配
const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
if (genericMatch) {
return genericMatch[1];
}
return '';
@ -109,38 +126,47 @@ export class BingDictionary extends DictionaryBase {
*/
_extractMeanings(html) {
const meanings = [];
// 尝试匹配常见的词典释义格式
// 格式1: <span class="pos">n.</span><span class="def">定义</span>
const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi;
let match;
while ((match = posDefPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition]));
}
}
// 格式2: 直接匹配 "词性. 释义" 格式
if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
// 必应词典释义结构:
// <li><span class="pos">n.</span> <span class="def">释义</span></li>
// 或者:
// <div class="df_div">词性. 释义</div>
// 过滤无效结果
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
if (definition.length < 2 || definition.length > 30) continue;
if (seen.has(definition)) continue;
// 模式1: 标准词典释义格式
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*def[^"]*"[^>]*>([^<]+)<\/span>/gi;
seen.add(definition);
meanings.push(createMeaning(partOfSpeech, [definition]));
let match;
while ((match = defPattern.exec(html)) !== null) {
const pos = this._stripHtml(match[1]).trim();
const def = this._stripHtml(match[2]).trim();
if (meanings.length >= 5) break;
if (pos && def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
}
}
// 模式2: 备选释义格式
if (meanings.length === 0) {
const altPattern = /<div[^>]*class="[^"]*(?:qdef|def|meaning)[^"]*"[^>]*>(.*?)<\/div>/gi;
while ((match = altPattern.exec(html)) !== null) {
const content = match[1];
// 提取词性和释义
const posMatches = content.matchAll(/<span[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/span>/gi);
const defMatches = content.matchAll(/<span[^>]*class="[^"]*def[^"]*"[^>]*>([^<]+)<\/span>/gi);
const poses = [...posMatches].map(m => this._stripHtml(m[1]).trim()).filter(Boolean);
const defs = [...defMatches].map(m => this._stripHtml(m[1]).trim()).filter(Boolean);
for (let i = 0; i < Math.min(poses.length, defs.length); i++) {
if (!seen.has(`${poses[i]}-${defs[i]}`)) {
seen.add(`${poses[i]}-${defs[i]}`);
meanings.push(createMeaning(poses[i], [defs[i]]));
}
}
}
}
@ -155,25 +181,36 @@ export class BingDictionary extends DictionaryBase {
*/
_extractExamples(html) {
const examples = [];
// 匹配例句模式:英文句子后跟中文翻译
// 尝试匹配 <li> 或 <div> 中的例句
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
let match;
const seen = new Set();
while ((match = sentencePattern.exec(html)) !== null) {
const sentence = match[1].trim();
const translation = match[2].trim();
// 必应例句格式:
// <div class="sen_li">
// <div class="sen_en">英文例句</div>
// <div class="sen_cn">中文翻译</div>
// </div>
if (seen.has(sentence)) continue;
// 提取所有例句块
const senPattern = /<div[^>]*class="[^"]*sen_li[^"]*"[^>]*>(.*?)<\/div>/gi;
const senMatches = [...html.matchAll(senPattern)];
for (const senMatch of senMatches) {
const senBlock = senMatch[1];
// 提取英文
const enMatch = senBlock.match(/<div[^>]*class="[^"]*sen_en[^"]*"[^>]*>(.*?)<\/div>/i);
// 提取中文
const cnMatch = senBlock.match(/<div[^>]*class="[^"]*sen_cn[^"]*"[^>]*>(.*?)<\/div>/i);
if (enMatch) {
const sentence = this._stripHtml(enMatch[1]).trim();
const translation = cnMatch ? this._stripHtml(cnMatch[1]).trim() : '';
// 过滤无效结果
if (sentence && sentence.length > 5 && !sentence.includes('<') && !seen.has(sentence)) {
seen.add(sentence);
// 验证:英文句子应该包含空格且长度合适
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
examples.push(createExample(sentence, translation));
}
}
if (examples.length >= 2) break;
}

View File

@ -46,7 +46,6 @@ export class YoudaoDictionary extends DictionaryBase {
const html = await response.text();
// 使用正则提取数据
return this._parseHtml(html, trimmedWord, url);
} catch (error) {
console.error('[YoudaoDictionary] Search failed:', error);
@ -62,7 +61,17 @@ export class YoudaoDictionary extends DictionaryBase {
}
/**
* 解析有道词典 HTML使用正则
* 去除 HTML 标签
* @private
* @param {string} html - 包含 HTML 的字符串
* @returns {string} 纯文本
*/
_stripHtml(html) {
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
}
/**
* 解析有道词典 HTML
* @private
* @param {string} html - HTML 内容
* @param {string} word - 查询的单词
@ -86,15 +95,24 @@ export class YoudaoDictionary extends DictionaryBase {
* @returns {string} 音标
*/
_extractPhonetic(html) {
// 匹配音标格式
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
// 有道词典音标通常在 phonetic 或 pron 类中
const patterns = [
/<span[^>]*class="[^"]*phonetic[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*phone[^"]*"[^>]*>([^<]+)<\/span>/i
];
for (const pattern of patterns) {
const match = html.match(pattern);
if (match) {
return match[0];
return match[1].trim();
}
}
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
if (match2) {
return match2[0];
// 通用音标匹配
const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
if (genericMatch) {
return genericMatch[1];
}
return '';
@ -108,36 +126,53 @@ export class YoudaoDictionary extends DictionaryBase {
*/
_extractMeanings(html) {
const meanings = [];
// 尝试匹配常见的词典释义格式
const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi;
let match;
while ((match = posDefPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition]));
}
}
// 备选:直接匹配 "词性. 释义" 格式
if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
// 有道词典释义结构:
// <li><span class="pos">n.</span> <span class="trans">释义</span></li>
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
if (definition.length < 2 || definition.length > 30) continue;
if (seen.has(definition)) continue;
// 模式1: 标准释义格式
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*trans[^"]*"[^>]*>([^<]+)<\/span>/gi;
seen.add(definition);
meanings.push(createMeaning(partOfSpeech, [definition]));
let match;
while ((match = defPattern.exec(html)) !== null) {
const pos = this._stripHtml(match[1]).trim();
const def = this._stripHtml(match[2]).trim();
if (meanings.length >= 5) break;
if (pos && def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
}
}
// 模式2: 备选释义格式 (trans-container 中的释义)
if (meanings.length === 0) {
const containerPattern = /<div[^>]*class="[^"]*trans-container[^"]*"[^>]*>(.*?)<\/ul>/i;
const containerMatch = html.match(containerPattern);
if (containerMatch) {
const container = containerMatch[1];
// 提取所有 li 项
const liPattern = /<li[^>]*>(.*?)<\/li>/gi;
const liMatches = [...container.matchAll(liPattern)];
for (const liMatch of liMatches) {
const content = liMatch[1];
// 提取词性
const posMatch = content.match(/<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>/i);
// 提取释义
const transMatch = content.match(/<span[^>]*class="[^"]*(?:trans|chn)[^"]*"[^>]*>([^<]+)<\/span>/i);
const pos = posMatch ? this._stripHtml(posMatch[1]).trim() : 'n.';
const def = transMatch ? this._stripHtml(transMatch[1]).trim() : this._stripHtml(content);
if (def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
}
}
}
}
@ -152,23 +187,34 @@ export class YoudaoDictionary extends DictionaryBase {
*/
_extractExamples(html) {
const examples = [];
// 匹配例句模式
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
let match;
const seen = new Set();
while ((match = sentencePattern.exec(html)) !== null) {
const sentence = match[1].trim();
const translation = match[2].trim();
// 有道例句格式:
// <li class="examples_li">
// <p class="examples_p">英文例句</p>
// <p class="examples_p">中文翻译</p>
// </li>
if (seen.has(sentence)) continue;
// 提取例句块
const senPattern = /<li[^>]*class="[^"]*examples_li[^"]*"[^>]*>(.*?)<\/li>/gi;
const senMatches = [...html.matchAll(senPattern)];
for (const senMatch of senMatches) {
const senBlock = senMatch[1];
// 提取所有 p 标签内容
const pPattern = /<p[^>]*class="[^"]*examples_p[^"]*"[^>]*>(.*?)<\/p>/gi;
const pMatches = [...senBlock.matchAll(pPattern)];
if (pMatches.length >= 2) {
const sentence = this._stripHtml(pMatches[0][1]).trim();
const translation = this._stripHtml(pMatches[1][1]).trim();
if (sentence && !sentence.includes('<') && !seen.has(sentence)) {
seen.add(sentence);
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
examples.push(createExample(sentence, translation));
}
}
if (examples.length >= 2) break;
}