fix(M3): 改进 HTML 解析去除标签,优化面板样式
This commit is contained in:
parent
a83d5d7bb9
commit
8bb5b2592d
@ -55,7 +55,7 @@ export class DictPanel {
|
||||
|
||||
.header {
|
||||
padding: 12px 16px;
|
||||
background-color: #4CAF50;
|
||||
background-color: #5DBE8C;
|
||||
color: white;
|
||||
font-size: 16px;
|
||||
font-weight: 500;
|
||||
@ -65,7 +65,7 @@ export class DictPanel {
|
||||
}
|
||||
|
||||
.header:hover {
|
||||
background-color: #45a049;
|
||||
background-color: #4da87a;
|
||||
}
|
||||
|
||||
.content {
|
||||
@ -75,6 +75,7 @@ export class DictPanel {
|
||||
font-size: 14px;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
background: #f5f5f5;
|
||||
}
|
||||
|
||||
.placeholder {
|
||||
@ -83,11 +84,16 @@ export class DictPanel {
|
||||
margin-top: 100px;
|
||||
}
|
||||
|
||||
/* 单词标题 */
|
||||
.word-title {
|
||||
font-size: 28px;
|
||||
font-size: 32px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
margin-bottom: 4px;
|
||||
background: #fff;
|
||||
padding: 12px 16px;
|
||||
border-radius: 8px;
|
||||
margin: -16px -16px 16px -16px;
|
||||
}
|
||||
|
||||
.phonetic {
|
||||
@ -97,125 +103,46 @@ export class DictPanel {
|
||||
font-family: 'Times New Roman', serif;
|
||||
}
|
||||
|
||||
.meanings-section {
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.section-title {
|
||||
font-size: 12px;
|
||||
font-weight: bold;
|
||||
color: #4CAF50;
|
||||
margin-bottom: 8px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.meaning-item {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.part-of-speech {
|
||||
color: #2196F3;
|
||||
font-weight: 500;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.definition {
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.examples-section {
|
||||
margin-top: 16px;
|
||||
}
|
||||
|
||||
.example-item {
|
||||
/* 词典区块 */
|
||||
.dict-section {
|
||||
margin-bottom: 12px;
|
||||
padding: 8px;
|
||||
background: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.example-sentence {
|
||||
color: #333;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.example-translation {
|
||||
color: #666;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.loading {
|
||||
text-align: center;
|
||||
padding: 40px 20px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.loading-spinner {
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
border: 3px solid #e0e0e0;
|
||||
border-top-color: #4CAF50;
|
||||
border-radius: 50%;
|
||||
animation: spin 0.8s linear infinite;
|
||||
margin: 0 auto 16px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.loading-text {
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.dict-section {
|
||||
margin-bottom: 16px;
|
||||
padding-bottom: 16px;
|
||||
border-bottom: 1px solid #eee;
|
||||
}
|
||||
|
||||
.dict-section:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.dict-section {
|
||||
margin-bottom: 8px;
|
||||
border: 1px solid #e0e0e0;
|
||||
border-radius: 6px;
|
||||
background: #fff;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.dict-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
padding: 10px 12px;
|
||||
background: #f8f8f8;
|
||||
background: #fff;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
transition: background 0.2s;
|
||||
border-bottom: 1px solid #f0f0f0;
|
||||
}
|
||||
|
||||
.dict-header:hover {
|
||||
background: #f0f0f0;
|
||||
background: #fafafa;
|
||||
}
|
||||
|
||||
.dict-toggle {
|
||||
font-size: 10px;
|
||||
margin-right: 6px;
|
||||
color: #666;
|
||||
margin-right: 8px;
|
||||
color: #999;
|
||||
width: 12px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.dict-icon {
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
width: 18px;
|
||||
height: 18px;
|
||||
border-radius: 3px;
|
||||
margin-right: 8px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-size: 10px;
|
||||
font-size: 11px;
|
||||
font-weight: bold;
|
||||
color: white;
|
||||
flex-shrink: 0;
|
||||
@ -234,19 +161,103 @@ export class DictPanel {
|
||||
}
|
||||
|
||||
.dict-name {
|
||||
font-size: 13px;
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.dict-content {
|
||||
padding: 12px;
|
||||
padding: 12px 16px;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.dict-content.collapsed {
|
||||
display: none;
|
||||
}
|
||||
|
||||
/* 释义 */
|
||||
.meanings-section {
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.section-title {
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
color: #4CAF50;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.meaning-item {
|
||||
margin-bottom: 6px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.part-of-speech {
|
||||
color: #2196F3;
|
||||
font-weight: 500;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.definition {
|
||||
color: #333;
|
||||
}
|
||||
|
||||
/* 例句 */
|
||||
.examples-section {
|
||||
margin-top: 12px;
|
||||
}
|
||||
|
||||
.example-item {
|
||||
margin-bottom: 10px;
|
||||
padding: 10px 12px;
|
||||
background: #f8f8f8;
|
||||
border-radius: 6px;
|
||||
}
|
||||
|
||||
.example-number {
|
||||
color: #999;
|
||||
margin-right: 6px;
|
||||
}
|
||||
|
||||
.example-sentence {
|
||||
color: #333;
|
||||
margin-bottom: 4px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.example-translation {
|
||||
color: #666;
|
||||
font-size: 13px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
/* 加载状态 */
|
||||
.loading {
|
||||
text-align: center;
|
||||
padding: 40px 20px;
|
||||
color: #666;
|
||||
background: #fff;
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.loading-spinner {
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
border: 3px solid #e0e0e0;
|
||||
border-top-color: #5DBE8C;
|
||||
border-radius: 50%;
|
||||
animation: spin 0.8s linear infinite;
|
||||
margin: 0 auto 16px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.loading-text {
|
||||
font-size: 14px;
|
||||
}
|
||||
</style>
|
||||
<div class="panel">
|
||||
<div class="header">词典结果</div>
|
||||
@ -614,9 +625,12 @@ export class DictPanel {
|
||||
// 最多显示2个例句
|
||||
const displayExamples = examples.slice(0, 2);
|
||||
|
||||
const examplesHtml = displayExamples.map(e => `
|
||||
const examplesHtml = displayExamples.map((e, index) => `
|
||||
<div class="example-item">
|
||||
<div class="example-sentence">${this._escapeHtml(e.sentence || '')}</div>
|
||||
<div class="example-sentence">
|
||||
<span class="example-number">${index + 1}.</span>
|
||||
${this._escapeHtml(e.sentence || '')}
|
||||
</div>
|
||||
${e.translation ? `<div class="example-translation">${this._escapeHtml(e.translation)}</div>` : ''}
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
@ -46,7 +46,6 @@ export class BingDictionary extends DictionaryBase {
|
||||
|
||||
const html = await response.text();
|
||||
|
||||
// 使用正则提取数据
|
||||
return this._parseHtml(html, trimmedWord, url);
|
||||
} catch (error) {
|
||||
console.error('[BingDictionary] Search failed:', error);
|
||||
@ -62,7 +61,17 @@ export class BingDictionary extends DictionaryBase {
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析必应词典 HTML(使用正则)
|
||||
* 去除 HTML 标签
|
||||
* @private
|
||||
* @param {string} html - 包含 HTML 的字符串
|
||||
* @returns {string} 纯文本
|
||||
*/
|
||||
_stripHtml(html) {
|
||||
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析必应词典 HTML
|
||||
* @private
|
||||
* @param {string} html - HTML 内容
|
||||
* @param {string} word - 查询的单词
|
||||
@ -86,16 +95,24 @@ export class BingDictionary extends DictionaryBase {
|
||||
* @returns {string} 音标
|
||||
*/
|
||||
_extractPhonetic(html) {
|
||||
// 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/
|
||||
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
|
||||
if (match) {
|
||||
return match[0];
|
||||
// 必应词典音标通常在 hd_pr 或 hd_tf 类中
|
||||
const patterns = [
|
||||
/<span[^>]*class="[^"]*hd_pr[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
|
||||
/<span[^>]*class="[^"]*hd_tf[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
|
||||
/<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = html.match(pattern);
|
||||
if (match) {
|
||||
return match[1].trim();
|
||||
}
|
||||
}
|
||||
|
||||
// 备选:匹配 /.../ 格式
|
||||
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
|
||||
if (match2) {
|
||||
return match2[0];
|
||||
|
||||
// 通用音标匹配
|
||||
const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
|
||||
if (genericMatch) {
|
||||
return genericMatch[1];
|
||||
}
|
||||
|
||||
return '';
|
||||
@ -109,38 +126,47 @@ export class BingDictionary extends DictionaryBase {
|
||||
*/
|
||||
_extractMeanings(html) {
|
||||
const meanings = [];
|
||||
const seen = new Set();
|
||||
|
||||
// 尝试匹配常见的词典释义格式
|
||||
// 格式1: <span class="pos">n.</span><span class="def">定义</span>
|
||||
const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi;
|
||||
// 必应词典释义结构:
|
||||
// <li><span class="pos">n.</span> <span class="def">释义</span></li>
|
||||
// 或者:
|
||||
// <div class="df_div">词性. 释义</div>
|
||||
|
||||
// 模式1: 标准词典释义格式
|
||||
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*def[^"]*"[^>]*>([^<]+)<\/span>/gi;
|
||||
|
||||
let match;
|
||||
while ((match = posDefPattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
if (partOfSpeech && definition) {
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
while ((match = defPattern.exec(html)) !== null) {
|
||||
const pos = this._stripHtml(match[1]).trim();
|
||||
const def = this._stripHtml(match[2]).trim();
|
||||
|
||||
if (pos && def && !seen.has(`${pos}-${def}`)) {
|
||||
seen.add(`${pos}-${def}`);
|
||||
meanings.push(createMeaning(pos, [def]));
|
||||
}
|
||||
}
|
||||
|
||||
// 格式2: 直接匹配 "词性. 释义" 格式
|
||||
// 模式2: 备选释义格式
|
||||
if (meanings.length === 0) {
|
||||
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
|
||||
const seen = new Set();
|
||||
const altPattern = /<div[^>]*class="[^"]*(?:qdef|def|meaning)[^"]*"[^>]*>(.*?)<\/div>/gi;
|
||||
|
||||
while ((match = simplePattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
while ((match = altPattern.exec(html)) !== null) {
|
||||
const content = match[1];
|
||||
|
||||
// 过滤无效结果
|
||||
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
|
||||
if (definition.length < 2 || definition.length > 30) continue;
|
||||
if (seen.has(definition)) continue;
|
||||
// 提取词性和释义
|
||||
const posMatches = content.matchAll(/<span[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/span>/gi);
|
||||
const defMatches = content.matchAll(/<span[^>]*class="[^"]*def[^"]*"[^>]*>([^<]+)<\/span>/gi);
|
||||
|
||||
seen.add(definition);
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
const poses = [...posMatches].map(m => this._stripHtml(m[1]).trim()).filter(Boolean);
|
||||
const defs = [...defMatches].map(m => this._stripHtml(m[1]).trim()).filter(Boolean);
|
||||
|
||||
if (meanings.length >= 5) break;
|
||||
for (let i = 0; i < Math.min(poses.length, defs.length); i++) {
|
||||
if (!seen.has(`${poses[i]}-${defs[i]}`)) {
|
||||
seen.add(`${poses[i]}-${defs[i]}`);
|
||||
meanings.push(createMeaning(poses[i], [defs[i]]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -155,24 +181,35 @@ export class BingDictionary extends DictionaryBase {
|
||||
*/
|
||||
_extractExamples(html) {
|
||||
const examples = [];
|
||||
|
||||
// 匹配例句模式:英文句子后跟中文翻译
|
||||
// 尝试匹配 <li> 或 <div> 中的例句
|
||||
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
|
||||
|
||||
let match;
|
||||
const seen = new Set();
|
||||
|
||||
while ((match = sentencePattern.exec(html)) !== null) {
|
||||
const sentence = match[1].trim();
|
||||
const translation = match[2].trim();
|
||||
// 必应例句格式:
|
||||
// <div class="sen_li">
|
||||
// <div class="sen_en">英文例句</div>
|
||||
// <div class="sen_cn">中文翻译</div>
|
||||
// </div>
|
||||
|
||||
// 提取所有例句块
|
||||
const senPattern = /<div[^>]*class="[^"]*sen_li[^"]*"[^>]*>(.*?)<\/div>/gi;
|
||||
const senMatches = [...html.matchAll(senPattern)];
|
||||
|
||||
for (const senMatch of senMatches) {
|
||||
const senBlock = senMatch[1];
|
||||
|
||||
if (seen.has(sentence)) continue;
|
||||
seen.add(sentence);
|
||||
// 提取英文
|
||||
const enMatch = senBlock.match(/<div[^>]*class="[^"]*sen_en[^"]*"[^>]*>(.*?)<\/div>/i);
|
||||
// 提取中文
|
||||
const cnMatch = senBlock.match(/<div[^>]*class="[^"]*sen_cn[^"]*"[^>]*>(.*?)<\/div>/i);
|
||||
|
||||
// 验证:英文句子应该包含空格且长度合适
|
||||
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
|
||||
examples.push(createExample(sentence, translation));
|
||||
if (enMatch) {
|
||||
const sentence = this._stripHtml(enMatch[1]).trim();
|
||||
const translation = cnMatch ? this._stripHtml(cnMatch[1]).trim() : '';
|
||||
|
||||
// 过滤无效结果
|
||||
if (sentence && sentence.length > 5 && !sentence.includes('<') && !seen.has(sentence)) {
|
||||
seen.add(sentence);
|
||||
examples.push(createExample(sentence, translation));
|
||||
}
|
||||
}
|
||||
|
||||
if (examples.length >= 2) break;
|
||||
|
||||
@ -46,7 +46,6 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
|
||||
const html = await response.text();
|
||||
|
||||
// 使用正则提取数据
|
||||
return this._parseHtml(html, trimmedWord, url);
|
||||
} catch (error) {
|
||||
console.error('[YoudaoDictionary] Search failed:', error);
|
||||
@ -62,7 +61,17 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析有道词典 HTML(使用正则)
|
||||
* 去除 HTML 标签
|
||||
* @private
|
||||
* @param {string} html - 包含 HTML 的字符串
|
||||
* @returns {string} 纯文本
|
||||
*/
|
||||
_stripHtml(html) {
|
||||
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析有道词典 HTML
|
||||
* @private
|
||||
* @param {string} html - HTML 内容
|
||||
* @param {string} word - 查询的单词
|
||||
@ -86,15 +95,24 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
* @returns {string} 音标
|
||||
*/
|
||||
_extractPhonetic(html) {
|
||||
// 匹配音标格式
|
||||
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
|
||||
if (match) {
|
||||
return match[0];
|
||||
// 有道词典音标通常在 phonetic 或 pron 类中
|
||||
const patterns = [
|
||||
/<span[^>]*class="[^"]*phonetic[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
|
||||
/<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
|
||||
/<span[^>]*class="[^"]*phone[^"]*"[^>]*>([^<]+)<\/span>/i
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = html.match(pattern);
|
||||
if (match) {
|
||||
return match[1].trim();
|
||||
}
|
||||
}
|
||||
|
||||
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
|
||||
if (match2) {
|
||||
return match2[0];
|
||||
|
||||
// 通用音标匹配
|
||||
const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
|
||||
if (genericMatch) {
|
||||
return genericMatch[1];
|
||||
}
|
||||
|
||||
return '';
|
||||
@ -108,36 +126,53 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
*/
|
||||
_extractMeanings(html) {
|
||||
const meanings = [];
|
||||
const seen = new Set();
|
||||
|
||||
// 尝试匹配常见的词典释义格式
|
||||
const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi;
|
||||
// 有道词典释义结构:
|
||||
// <li><span class="pos">n.</span> <span class="trans">释义</span></li>
|
||||
|
||||
// 模式1: 标准释义格式
|
||||
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*trans[^"]*"[^>]*>([^<]+)<\/span>/gi;
|
||||
|
||||
let match;
|
||||
while ((match = posDefPattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
if (partOfSpeech && definition) {
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
while ((match = defPattern.exec(html)) !== null) {
|
||||
const pos = this._stripHtml(match[1]).trim();
|
||||
const def = this._stripHtml(match[2]).trim();
|
||||
|
||||
if (pos && def && !seen.has(`${pos}-${def}`)) {
|
||||
seen.add(`${pos}-${def}`);
|
||||
meanings.push(createMeaning(pos, [def]));
|
||||
}
|
||||
}
|
||||
|
||||
// 备选:直接匹配 "词性. 释义" 格式
|
||||
// 模式2: 备选释义格式 (trans-container 中的释义)
|
||||
if (meanings.length === 0) {
|
||||
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
|
||||
const seen = new Set();
|
||||
const containerPattern = /<div[^>]*class="[^"]*trans-container[^"]*"[^>]*>(.*?)<\/ul>/i;
|
||||
const containerMatch = html.match(containerPattern);
|
||||
|
||||
while ((match = simplePattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
if (containerMatch) {
|
||||
const container = containerMatch[1];
|
||||
|
||||
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
|
||||
if (definition.length < 2 || definition.length > 30) continue;
|
||||
if (seen.has(definition)) continue;
|
||||
// 提取所有 li 项
|
||||
const liPattern = /<li[^>]*>(.*?)<\/li>/gi;
|
||||
const liMatches = [...container.matchAll(liPattern)];
|
||||
|
||||
seen.add(definition);
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
|
||||
if (meanings.length >= 5) break;
|
||||
for (const liMatch of liMatches) {
|
||||
const content = liMatch[1];
|
||||
|
||||
// 提取词性
|
||||
const posMatch = content.match(/<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>/i);
|
||||
// 提取释义
|
||||
const transMatch = content.match(/<span[^>]*class="[^"]*(?:trans|chn)[^"]*"[^>]*>([^<]+)<\/span>/i);
|
||||
|
||||
const pos = posMatch ? this._stripHtml(posMatch[1]).trim() : 'n.';
|
||||
const def = transMatch ? this._stripHtml(transMatch[1]).trim() : this._stripHtml(content);
|
||||
|
||||
if (def && !seen.has(`${pos}-${def}`)) {
|
||||
seen.add(`${pos}-${def}`);
|
||||
meanings.push(createMeaning(pos, [def]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,22 +187,33 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
*/
|
||||
_extractExamples(html) {
|
||||
const examples = [];
|
||||
|
||||
// 匹配例句模式
|
||||
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
|
||||
|
||||
let match;
|
||||
const seen = new Set();
|
||||
|
||||
while ((match = sentencePattern.exec(html)) !== null) {
|
||||
const sentence = match[1].trim();
|
||||
const translation = match[2].trim();
|
||||
// 有道例句格式:
|
||||
// <li class="examples_li">
|
||||
// <p class="examples_p">英文例句</p>
|
||||
// <p class="examples_p">中文翻译</p>
|
||||
// </li>
|
||||
|
||||
// 提取例句块
|
||||
const senPattern = /<li[^>]*class="[^"]*examples_li[^"]*"[^>]*>(.*?)<\/li>/gi;
|
||||
const senMatches = [...html.matchAll(senPattern)];
|
||||
|
||||
for (const senMatch of senMatches) {
|
||||
const senBlock = senMatch[1];
|
||||
|
||||
if (seen.has(sentence)) continue;
|
||||
seen.add(sentence);
|
||||
// 提取所有 p 标签内容
|
||||
const pPattern = /<p[^>]*class="[^"]*examples_p[^"]*"[^>]*>(.*?)<\/p>/gi;
|
||||
const pMatches = [...senBlock.matchAll(pPattern)];
|
||||
|
||||
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
|
||||
examples.push(createExample(sentence, translation));
|
||||
if (pMatches.length >= 2) {
|
||||
const sentence = this._stripHtml(pMatches[0][1]).trim();
|
||||
const translation = this._stripHtml(pMatches[1][1]).trim();
|
||||
|
||||
if (sentence && !sentence.includes('<') && !seen.has(sentence)) {
|
||||
seen.add(sentence);
|
||||
examples.push(createExample(sentence, translation));
|
||||
}
|
||||
}
|
||||
|
||||
if (examples.length >= 2) break;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user