fix(M3): 改进 HTML 解析去除标签,优化面板样式

This commit is contained in:
李岩岩 2026-02-11 15:23:29 +08:00
parent a83d5d7bb9
commit 8bb5b2592d
3 changed files with 284 additions and 187 deletions

View File

@ -55,7 +55,7 @@ export class DictPanel {
.header { .header {
padding: 12px 16px; padding: 12px 16px;
background-color: #4CAF50; background-color: #5DBE8C;
color: white; color: white;
font-size: 16px; font-size: 16px;
font-weight: 500; font-weight: 500;
@ -65,7 +65,7 @@ export class DictPanel {
} }
.header:hover { .header:hover {
background-color: #45a049; background-color: #4da87a;
} }
.content { .content {
@ -75,6 +75,7 @@ export class DictPanel {
font-size: 14px; font-size: 14px;
line-height: 1.6; line-height: 1.6;
color: #333; color: #333;
background: #f5f5f5;
} }
.placeholder { .placeholder {
@ -83,11 +84,16 @@ export class DictPanel {
margin-top: 100px; margin-top: 100px;
} }
/* 单词标题 */
.word-title { .word-title {
font-size: 28px; font-size: 32px;
font-weight: bold; font-weight: bold;
color: #333; color: #333;
margin-bottom: 4px; margin-bottom: 4px;
background: #fff;
padding: 12px 16px;
border-radius: 8px;
margin: -16px -16px 16px -16px;
} }
.phonetic { .phonetic {
@ -97,125 +103,46 @@ export class DictPanel {
font-family: 'Times New Roman', serif; font-family: 'Times New Roman', serif;
} }
.meanings-section { /* 词典区块 */
margin-bottom: 16px; .dict-section {
}
.section-title {
font-size: 12px;
font-weight: bold;
color: #4CAF50;
margin-bottom: 8px;
text-transform: uppercase;
}
.meaning-item {
margin-bottom: 8px;
}
.part-of-speech {
color: #2196F3;
font-weight: 500;
margin-right: 8px;
}
.definition {
color: #333;
}
.examples-section {
margin-top: 16px;
}
.example-item {
margin-bottom: 12px; margin-bottom: 12px;
padding: 8px; background: #fff;
background: #f5f5f5; border-radius: 8px;
border-radius: 4px;
}
.example-sentence {
color: #333;
margin-bottom: 4px;
}
.example-translation {
color: #666;
font-size: 13px;
}
.loading {
text-align: center;
padding: 40px 20px;
color: #666;
}
.loading-spinner {
width: 32px;
height: 32px;
border: 3px solid #e0e0e0;
border-top-color: #4CAF50;
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin: 0 auto 16px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-text {
font-size: 14px;
}
.dict-section {
margin-bottom: 16px;
padding-bottom: 16px;
border-bottom: 1px solid #eee;
}
.dict-section:last-child {
border-bottom: none;
}
.dict-section {
margin-bottom: 8px;
border: 1px solid #e0e0e0;
border-radius: 6px;
overflow: hidden; overflow: hidden;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
} }
.dict-header { .dict-header {
display: flex; display: flex;
align-items: center; align-items: center;
padding: 10px 12px; padding: 10px 12px;
background: #f8f8f8; background: #fff;
cursor: pointer; cursor: pointer;
user-select: none; user-select: none;
transition: background 0.2s; border-bottom: 1px solid #f0f0f0;
} }
.dict-header:hover { .dict-header:hover {
background: #f0f0f0; background: #fafafa;
} }
.dict-toggle { .dict-toggle {
font-size: 10px; font-size: 10px;
margin-right: 6px; margin-right: 8px;
color: #666; color: #999;
width: 12px; width: 12px;
text-align: center; text-align: center;
} }
.dict-icon { .dict-icon {
width: 16px; width: 18px;
height: 16px; height: 18px;
border-radius: 3px; border-radius: 3px;
margin-right: 8px; margin-right: 8px;
display: flex; display: flex;
align-items: center; align-items: center;
justify-content: center; justify-content: center;
font-size: 10px; font-size: 11px;
font-weight: bold; font-weight: bold;
color: white; color: white;
flex-shrink: 0; flex-shrink: 0;
@ -234,19 +161,103 @@ export class DictPanel {
} }
.dict-name { .dict-name {
font-size: 13px; font-size: 14px;
font-weight: 600; font-weight: 600;
color: #333; color: #333;
flex: 1;
} }
.dict-content { .dict-content {
padding: 12px; padding: 12px 16px;
display: block; display: block;
} }
.dict-content.collapsed { .dict-content.collapsed {
display: none; display: none;
} }
/* 释义 */
.meanings-section {
margin-bottom: 12px;
}
.section-title {
font-size: 12px;
font-weight: 600;
color: #4CAF50;
margin-bottom: 8px;
}
.meaning-item {
margin-bottom: 6px;
font-size: 14px;
}
.part-of-speech {
color: #2196F3;
font-weight: 500;
margin-right: 8px;
}
.definition {
color: #333;
}
/* 例句 */
.examples-section {
margin-top: 12px;
}
.example-item {
margin-bottom: 10px;
padding: 10px 12px;
background: #f8f8f8;
border-radius: 6px;
}
.example-number {
color: #999;
margin-right: 6px;
}
.example-sentence {
color: #333;
margin-bottom: 4px;
line-height: 1.5;
}
.example-translation {
color: #666;
font-size: 13px;
line-height: 1.5;
}
/* 加载状态 */
.loading {
text-align: center;
padding: 40px 20px;
color: #666;
background: #fff;
border-radius: 8px;
}
.loading-spinner {
width: 32px;
height: 32px;
border: 3px solid #e0e0e0;
border-top-color: #5DBE8C;
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin: 0 auto 16px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-text {
font-size: 14px;
}
</style> </style>
<div class="panel"> <div class="panel">
<div class="header">词典结果</div> <div class="header">词典结果</div>
@ -614,9 +625,12 @@ export class DictPanel {
// 最多显示2个例句 // 最多显示2个例句
const displayExamples = examples.slice(0, 2); const displayExamples = examples.slice(0, 2);
const examplesHtml = displayExamples.map(e => ` const examplesHtml = displayExamples.map((e, index) => `
<div class="example-item"> <div class="example-item">
<div class="example-sentence">${this._escapeHtml(e.sentence || '')}</div> <div class="example-sentence">
<span class="example-number">${index + 1}.</span>
${this._escapeHtml(e.sentence || '')}
</div>
${e.translation ? `<div class="example-translation">${this._escapeHtml(e.translation)}</div>` : ''} ${e.translation ? `<div class="example-translation">${this._escapeHtml(e.translation)}</div>` : ''}
</div> </div>
`).join(''); `).join('');

View File

@ -46,7 +46,6 @@ export class BingDictionary extends DictionaryBase {
const html = await response.text(); const html = await response.text();
// 使用正则提取数据
return this._parseHtml(html, trimmedWord, url); return this._parseHtml(html, trimmedWord, url);
} catch (error) { } catch (error) {
console.error('[BingDictionary] Search failed:', error); console.error('[BingDictionary] Search failed:', error);
@ -62,7 +61,17 @@ export class BingDictionary extends DictionaryBase {
} }
/** /**
* 解析必应词典 HTML使用正则 * 去除 HTML 标签
* @private
* @param {string} html - 包含 HTML 的字符串
* @returns {string} 纯文本
*/
_stripHtml(html) {
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
}
/**
* 解析必应词典 HTML
* @private * @private
* @param {string} html - HTML 内容 * @param {string} html - HTML 内容
* @param {string} word - 查询的单词 * @param {string} word - 查询的单词
@ -86,16 +95,24 @@ export class BingDictionary extends DictionaryBase {
* @returns {string} 音标 * @returns {string} 音标
*/ */
_extractPhonetic(html) { _extractPhonetic(html) {
// 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/ // 必应词典音标通常在 hd_pr 或 hd_tf 类中
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/); const patterns = [
if (match) { /<span[^>]*class="[^"]*hd_pr[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
return match[0]; /<span[^>]*class="[^"]*hd_tf[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i
];
for (const pattern of patterns) {
const match = html.match(pattern);
if (match) {
return match[1].trim();
}
} }
// 备选:匹配 /.../ 格式 // 通用音标匹配
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//); const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
if (match2) { if (genericMatch) {
return match2[0]; return genericMatch[1];
} }
return ''; return '';
@ -109,38 +126,47 @@ export class BingDictionary extends DictionaryBase {
*/ */
_extractMeanings(html) { _extractMeanings(html) {
const meanings = []; const meanings = [];
const seen = new Set();
// 尝试匹配常见的词典释义格式 // 必应词典释义结构:
// 格式1: <span class="pos">n.</span><span class="def">定义</span> // <li><span class="pos">n.</span> <span class="def">释义</span></li>
const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi; // 或者:
// <div class="df_div">词性. 释义</div>
// 模式1: 标准词典释义格式
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*def[^"]*"[^>]*>([^<]+)<\/span>/gi;
let match; let match;
while ((match = posDefPattern.exec(html)) !== null) { while ((match = defPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim(); const pos = this._stripHtml(match[1]).trim();
const definition = match[2].trim(); const def = this._stripHtml(match[2]).trim();
if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition])); if (pos && def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
} }
} }
// 格式2: 直接匹配 "词性. 释义" 格式 // 模式2: 备选释义格式
if (meanings.length === 0) { if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi; const altPattern = /<div[^>]*class="[^"]*(?:qdef|def|meaning)[^"]*"[^>]*>(.*?)<\/div>/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) { while ((match = altPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim(); const content = match[1];
const definition = match[2].trim();
// 过滤无效结果 // 提取词性和释义
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue; const posMatches = content.matchAll(/<span[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/span>/gi);
if (definition.length < 2 || definition.length > 30) continue; const defMatches = content.matchAll(/<span[^>]*class="[^"]*def[^"]*"[^>]*>([^<]+)<\/span>/gi);
if (seen.has(definition)) continue;
seen.add(definition); const poses = [...posMatches].map(m => this._stripHtml(m[1]).trim()).filter(Boolean);
meanings.push(createMeaning(partOfSpeech, [definition])); const defs = [...defMatches].map(m => this._stripHtml(m[1]).trim()).filter(Boolean);
if (meanings.length >= 5) break; for (let i = 0; i < Math.min(poses.length, defs.length); i++) {
if (!seen.has(`${poses[i]}-${defs[i]}`)) {
seen.add(`${poses[i]}-${defs[i]}`);
meanings.push(createMeaning(poses[i], [defs[i]]));
}
}
} }
} }
@ -155,24 +181,35 @@ export class BingDictionary extends DictionaryBase {
*/ */
_extractExamples(html) { _extractExamples(html) {
const examples = []; const examples = [];
// 匹配例句模式:英文句子后跟中文翻译
// 尝试匹配 <li> 或 <div> 中的例句
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
let match;
const seen = new Set(); const seen = new Set();
while ((match = sentencePattern.exec(html)) !== null) { // 必应例句格式:
const sentence = match[1].trim(); // <div class="sen_li">
const translation = match[2].trim(); // <div class="sen_en">英文例句</div>
// <div class="sen_cn">中文翻译</div>
// </div>
if (seen.has(sentence)) continue; // 提取所有例句块
seen.add(sentence); const senPattern = /<div[^>]*class="[^"]*sen_li[^"]*"[^>]*>(.*?)<\/div>/gi;
const senMatches = [...html.matchAll(senPattern)];
// 验证:英文句子应该包含空格且长度合适 for (const senMatch of senMatches) {
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) { const senBlock = senMatch[1];
examples.push(createExample(sentence, translation));
// 提取英文
const enMatch = senBlock.match(/<div[^>]*class="[^"]*sen_en[^"]*"[^>]*>(.*?)<\/div>/i);
// 提取中文
const cnMatch = senBlock.match(/<div[^>]*class="[^"]*sen_cn[^"]*"[^>]*>(.*?)<\/div>/i);
if (enMatch) {
const sentence = this._stripHtml(enMatch[1]).trim();
const translation = cnMatch ? this._stripHtml(cnMatch[1]).trim() : '';
// 过滤无效结果
if (sentence && sentence.length > 5 && !sentence.includes('<') && !seen.has(sentence)) {
seen.add(sentence);
examples.push(createExample(sentence, translation));
}
} }
if (examples.length >= 2) break; if (examples.length >= 2) break;

View File

@ -46,7 +46,6 @@ export class YoudaoDictionary extends DictionaryBase {
const html = await response.text(); const html = await response.text();
// 使用正则提取数据
return this._parseHtml(html, trimmedWord, url); return this._parseHtml(html, trimmedWord, url);
} catch (error) { } catch (error) {
console.error('[YoudaoDictionary] Search failed:', error); console.error('[YoudaoDictionary] Search failed:', error);
@ -62,7 +61,17 @@ export class YoudaoDictionary extends DictionaryBase {
} }
/** /**
* 解析有道词典 HTML使用正则 * 去除 HTML 标签
* @private
* @param {string} html - 包含 HTML 的字符串
* @returns {string} 纯文本
*/
_stripHtml(html) {
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
}
/**
* 解析有道词典 HTML
* @private * @private
* @param {string} html - HTML 内容 * @param {string} html - HTML 内容
* @param {string} word - 查询的单词 * @param {string} word - 查询的单词
@ -86,15 +95,24 @@ export class YoudaoDictionary extends DictionaryBase {
* @returns {string} 音标 * @returns {string} 音标
*/ */
_extractPhonetic(html) { _extractPhonetic(html) {
// 匹配音标格式 // 有道词典音标通常在 phonetic 或 pron 类中
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/); const patterns = [
if (match) { /<span[^>]*class="[^"]*phonetic[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
return match[0]; /<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*phone[^"]*"[^>]*>([^<]+)<\/span>/i
];
for (const pattern of patterns) {
const match = html.match(pattern);
if (match) {
return match[1].trim();
}
} }
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//); // 通用音标匹配
if (match2) { const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
return match2[0]; if (genericMatch) {
return genericMatch[1];
} }
return ''; return '';
@ -108,36 +126,53 @@ export class YoudaoDictionary extends DictionaryBase {
*/ */
_extractMeanings(html) { _extractMeanings(html) {
const meanings = []; const meanings = [];
const seen = new Set();
// 尝试匹配常见的词典释义格式 // 有道词典释义结构:
const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi; // <li><span class="pos">n.</span> <span class="trans">释义</span></li>
// 模式1: 标准释义格式
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*trans[^"]*"[^>]*>([^<]+)<\/span>/gi;
let match; let match;
while ((match = posDefPattern.exec(html)) !== null) { while ((match = defPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim(); const pos = this._stripHtml(match[1]).trim();
const definition = match[2].trim(); const def = this._stripHtml(match[2]).trim();
if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition])); if (pos && def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
} }
} }
// 备选:直接匹配 "词性. 释义" 格式 // 模式2: 备选释义格式 (trans-container 中的释义)
if (meanings.length === 0) { if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi; const containerPattern = /<div[^>]*class="[^"]*trans-container[^"]*"[^>]*>(.*?)<\/ul>/i;
const seen = new Set(); const containerMatch = html.match(containerPattern);
while ((match = simplePattern.exec(html)) !== null) { if (containerMatch) {
const partOfSpeech = match[1].trim(); const container = containerMatch[1];
const definition = match[2].trim();
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue; // 提取所有 li 项
if (definition.length < 2 || definition.length > 30) continue; const liPattern = /<li[^>]*>(.*?)<\/li>/gi;
if (seen.has(definition)) continue; const liMatches = [...container.matchAll(liPattern)];
seen.add(definition); for (const liMatch of liMatches) {
meanings.push(createMeaning(partOfSpeech, [definition])); const content = liMatch[1];
if (meanings.length >= 5) break; // 提取词性
const posMatch = content.match(/<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>/i);
// 提取释义
const transMatch = content.match(/<span[^>]*class="[^"]*(?:trans|chn)[^"]*"[^>]*>([^<]+)<\/span>/i);
const pos = posMatch ? this._stripHtml(posMatch[1]).trim() : 'n.';
const def = transMatch ? this._stripHtml(transMatch[1]).trim() : this._stripHtml(content);
if (def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
}
}
} }
} }
@ -152,22 +187,33 @@ export class YoudaoDictionary extends DictionaryBase {
*/ */
_extractExamples(html) { _extractExamples(html) {
const examples = []; const examples = [];
// 匹配例句模式
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
let match;
const seen = new Set(); const seen = new Set();
while ((match = sentencePattern.exec(html)) !== null) { // 有道例句格式:
const sentence = match[1].trim(); // <li class="examples_li">
const translation = match[2].trim(); // <p class="examples_p">英文例句</p>
// <p class="examples_p">中文翻译</p>
// </li>
if (seen.has(sentence)) continue; // 提取例句块
seen.add(sentence); const senPattern = /<li[^>]*class="[^"]*examples_li[^"]*"[^>]*>(.*?)<\/li>/gi;
const senMatches = [...html.matchAll(senPattern)];
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) { for (const senMatch of senMatches) {
examples.push(createExample(sentence, translation)); const senBlock = senMatch[1];
// 提取所有 p 标签内容
const pPattern = /<p[^>]*class="[^"]*examples_p[^"]*"[^>]*>(.*?)<\/p>/gi;
const pMatches = [...senBlock.matchAll(pPattern)];
if (pMatches.length >= 2) {
const sentence = this._stripHtml(pMatches[0][1]).trim();
const translation = this._stripHtml(pMatches[1][1]).trim();
if (sentence && !sentence.includes('<') && !seen.has(sentence)) {
seen.add(sentence);
examples.push(createExample(sentence, translation));
}
} }
if (examples.length >= 2) break; if (examples.length >= 2) break;