fix(M3): 使用正则表达式替代 DOMParser,修复 Service Worker 环境限制

This commit is contained in:
李岩岩 2026-02-11 15:01:25 +08:00
parent 64290b9dd1
commit a83d5d7bb9
2 changed files with 129 additions and 209 deletions

View File

@ -32,7 +32,6 @@ export class BingDictionary extends DictionaryBase {
const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`; const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`;
try { try {
// 在 Background 中直接使用 fetch
const response = await fetch(url, { const response = await fetch(url, {
method: 'GET', method: 'GET',
headers: { headers: {
@ -47,12 +46,11 @@ export class BingDictionary extends DictionaryBase {
const html = await response.text(); const html = await response.text();
// 解析 HTML 提取数据 // 使用正则提取数据
return this._parseHtml(html, trimmedWord, url); return this._parseHtml(html, trimmedWord, url);
} catch (error) { } catch (error) {
console.error('[BingDictionary] Search failed:', error); console.error('[BingDictionary] Search failed:', error);
// 返回友好错误提示
return createResult({ return createResult({
word: trimmedWord, word: trimmedWord,
phonetic: '', phonetic: '',
@ -64,7 +62,7 @@ export class BingDictionary extends DictionaryBase {
} }
/** /**
* 解析必应词典 HTML * 解析必应词典 HTML使用正则
* @private * @private
* @param {string} html - HTML 内容 * @param {string} html - HTML 内容
* @param {string} word - 查询的单词 * @param {string} word - 查询的单词
@ -72,23 +70,11 @@ export class BingDictionary extends DictionaryBase {
* @returns {DictionaryResult} 解析结果 * @returns {DictionaryResult} 解析结果
*/ */
_parseHtml(html, word, url) { _parseHtml(html, word, url) {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// 提取音标
const phonetic = this._extractPhonetic(doc);
// 提取释义
const meanings = this._extractMeanings(doc);
// 提取例句
const examples = this._extractExamples(doc);
return createResult({ return createResult({
word, word,
phonetic, phonetic: this._extractPhonetic(html),
meanings, meanings: this._extractMeanings(html),
examples, examples: this._extractExamples(html),
url url
}); });
} }
@ -96,34 +82,21 @@ export class BingDictionary extends DictionaryBase {
/** /**
* 提取音标 * 提取音标
* @private * @private
* @param {Document} doc - HTML 文档 * @param {string} html - HTML 内容
* @returns {string} 音标 * @returns {string} 音标
*/ */
_extractPhonetic(doc) { _extractPhonetic(html) {
// 尝试多个可能的选择器 // 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/
const selectors = [ const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
'.hd_p1_1F_OWM', // 主要音标容器
'.hd_tf_lh', // 音标文本
'[class*="phonetic"]', // 包含 phonetic 的类
'.prons' // 发音区域
];
for (const selector of selectors) {
const elements = doc.querySelectorAll(selector);
for (const el of elements) {
const text = el.textContent?.trim();
if (text && text.includes('/')) {
return text;
}
}
}
// 正则提取 fallback
const bodyText = doc.body?.textContent || '';
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
if (match) { if (match) {
return match[0]; return match[0];
} }
// 备选:匹配 /.../ 格式
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
if (match2) {
return match2[0];
}
return ''; return '';
} }
@ -131,46 +104,44 @@ export class BingDictionary extends DictionaryBase {
/** /**
* 提取释义 * 提取释义
* @private * @private
* @param {Document} doc - HTML 文档 * @param {string} html - HTML 内容
* @returns {Array<Meaning>} 释义列表 * @returns {Array<Meaning>} 释义列表
*/ */
_extractMeanings(doc) { _extractMeanings(html) {
const meanings = []; const meanings = [];
// 尝试多个可能的选择器 // 尝试匹配常见的词典释义格式
const selectors = [ // 格式1: <span class="pos">n.</span><span class="def">定义</span>
'.qdef ul li', // 主要释义列表 const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi;
'.def li', // 备选释义
'[class*="meaning"] li', // 包含 meaning 的类 let match;
'.content ul li' // 通用内容列表 while ((match = posDefPattern.exec(html)) !== null) {
]; const partOfSpeech = match[1].trim();
const definition = match[2].trim();
for (const selector of selectors) { if (partOfSpeech && definition) {
const items = doc.querySelectorAll(selector); meanings.push(createMeaning(partOfSpeech, [definition]));
for (const item of items) {
const text = item.textContent?.trim();
if (!text) continue;
// 尝试匹配词性和释义
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
if (match) {
const partOfSpeech = match[1];
const defsText = match[2];
// 分割多个释义
const definitions = defsText
.split(/[;]/)
.map(d => d.trim())
.filter(d => d.length > 0);
if (definitions.length > 0) {
meanings.push(createMeaning(partOfSpeech, definitions));
}
}
} }
}
if (meanings.length > 0) break; // 格式2: 直接匹配 "词性. 释义" 格式
if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
// 过滤无效结果
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
if (definition.length < 2 || definition.length > 30) continue;
if (seen.has(definition)) continue;
seen.add(definition);
meanings.push(createMeaning(partOfSpeech, [definition]));
if (meanings.length >= 5) break;
}
} }
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])]; return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
@ -179,38 +150,32 @@ export class BingDictionary extends DictionaryBase {
/** /**
* 提取例句 * 提取例句
* @private * @private
* @param {Document} doc - HTML 文档 * @param {string} html - HTML 内容
* @returns {Array<Example>} 例句列表 * @returns {Array<Example>} 例句列表
*/ */
_extractExamples(doc) { _extractExamples(html) {
const examples = []; const examples = [];
// 尝试多个可能的选择器 // 匹配例句模式:英文句子后跟中文翻译
const selectors = [ // 尝试匹配 <li> 或 <div> 中的例句
'.sen_li', // 主要例句容器 const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
'.sentences li', // 备选例句
'[class*="example"] li', // 包含 example 的类 let match;
'.content .ex_li' // 通用例句 const seen = new Set();
];
while ((match = sentencePattern.exec(html)) !== null) {
for (const selector of selectors) { const sentence = match[1].trim();
const items = doc.querySelectorAll(selector); const translation = match[2].trim();
for (const item of items) { if (seen.has(sentence)) continue;
const enEl = item.querySelector('.sen_en, .en_sent, [class*="english"]'); seen.add(sentence);
const cnEl = item.querySelector('.sen_cn, .cn_sent, [class*="chinese"]');
// 验证:英文句子应该包含空格且长度合适
const sentence = enEl?.textContent?.trim() || item.textContent?.trim(); if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
const translation = cnEl?.textContent?.trim() || ''; examples.push(createExample(sentence, translation));
if (sentence) {
examples.push(createExample(sentence, translation));
}
if (examples.length >= 2) break;
} }
if (examples.length > 0) break; if (examples.length >= 2) break;
} }
return examples; return examples;

View File

@ -32,7 +32,6 @@ export class YoudaoDictionary extends DictionaryBase {
const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`; const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`;
try { try {
// 在 Background 中直接使用 fetch
const response = await fetch(url, { const response = await fetch(url, {
method: 'GET', method: 'GET',
headers: { headers: {
@ -47,12 +46,11 @@ export class YoudaoDictionary extends DictionaryBase {
const html = await response.text(); const html = await response.text();
// 解析 HTML 提取数据 // 使用正则提取数据
return this._parseHtml(html, trimmedWord, url); return this._parseHtml(html, trimmedWord, url);
} catch (error) { } catch (error) {
console.error('[YoudaoDictionary] Search failed:', error); console.error('[YoudaoDictionary] Search failed:', error);
// 返回友好错误提示
return createResult({ return createResult({
word: trimmedWord, word: trimmedWord,
phonetic: '', phonetic: '',
@ -64,7 +62,7 @@ export class YoudaoDictionary extends DictionaryBase {
} }
/** /**
* 解析有道词典 HTML * 解析有道词典 HTML使用正则
* @private * @private
* @param {string} html - HTML 内容 * @param {string} html - HTML 内容
* @param {string} word - 查询的单词 * @param {string} word - 查询的单词
@ -72,23 +70,11 @@ export class YoudaoDictionary extends DictionaryBase {
* @returns {DictionaryResult} 解析结果 * @returns {DictionaryResult} 解析结果
*/ */
_parseHtml(html, word, url) { _parseHtml(html, word, url) {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// 提取音标
const phonetic = this._extractPhonetic(doc);
// 提取释义
const meanings = this._extractMeanings(doc);
// 提取例句
const examples = this._extractExamples(doc);
return createResult({ return createResult({
word, word,
phonetic, phonetic: this._extractPhonetic(html),
meanings, meanings: this._extractMeanings(html),
examples, examples: this._extractExamples(html),
url url
}); });
} }
@ -96,34 +82,20 @@ export class YoudaoDictionary extends DictionaryBase {
/** /**
* 提取音标 * 提取音标
* @private * @private
* @param {Document} doc - HTML 文档 * @param {string} html - HTML 内容
* @returns {string} 音标 * @returns {string} 音标
*/ */
_extractPhonetic(doc) { _extractPhonetic(html) {
// 尝试多个可能的选择器 // 匹配音标格式
const selectors = [ const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
'.phonetic', // 主要音标类
'.pronounce', // 发音区域
'[class*="phonetic"]', // 包含 phonetic 的类
'.word-info .phonetic' // 单词信息区的音标
];
for (const selector of selectors) {
const elements = doc.querySelectorAll(selector);
for (const el of elements) {
const text = el.textContent?.trim();
if (text && (text.includes('/') || text.includes('['))) {
return text;
}
}
}
// 正则提取 fallback
const bodyText = doc.body?.textContent || '';
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
if (match) { if (match) {
return match[0]; return match[0];
} }
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
if (match2) {
return match2[0];
}
return ''; return '';
} }
@ -131,50 +103,42 @@ export class YoudaoDictionary extends DictionaryBase {
/** /**
* 提取释义 * 提取释义
* @private * @private
* @param {Document} doc - HTML 文档 * @param {string} html - HTML 内容
* @returns {Array<Meaning>} 释义列表 * @returns {Array<Meaning>} 释义列表
*/ */
_extractMeanings(doc) { _extractMeanings(html) {
const meanings = []; const meanings = [];
// 尝试多个可能的选择器 // 尝试匹配常见的词典释义格式
const selectors = [ const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi;
'.trans-container ul li', // 主要释义列表
'.basic .word-exp', // 基本释义 let match;
'.meaning li', // 备选释义 while ((match = posDefPattern.exec(html)) !== null) {
'[class*="meaning"] li', // 包含 meaning 的类 const partOfSpeech = match[1].trim();
'.content ul li' // 通用内容列表 const definition = match[2].trim();
]; if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition]));
for (const selector of selectors) {
const items = doc.querySelectorAll(selector);
for (const item of items) {
const text = item.textContent?.trim();
if (!text) continue;
// 尝试匹配词性和释义
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
if (match) {
const partOfSpeech = match[1];
const defsText = match[2];
// 分割多个释义
const definitions = defsText
.split(/[;]/)
.map(d => d.trim())
.filter(d => d.length > 0);
if (definitions.length > 0) {
meanings.push(createMeaning(partOfSpeech, definitions));
}
} else if (text.length > 0 && text.length < 50) {
// 没有词性标记的释义
meanings.push(createMeaning('', [text]));
}
} }
}
if (meanings.length > 0) break; // 备选:直接匹配 "词性. 释义" 格式
if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
if (definition.length < 2 || definition.length > 30) continue;
if (seen.has(definition)) continue;
seen.add(definition);
meanings.push(createMeaning(partOfSpeech, [definition]));
if (meanings.length >= 5) break;
}
} }
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])]; return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
@ -183,39 +147,30 @@ export class YoudaoDictionary extends DictionaryBase {
/** /**
* 提取例句 * 提取例句
* @private * @private
* @param {Document} doc - HTML 文档 * @param {string} html - HTML 内容
* @returns {Array<Example>} 例句列表 * @returns {Array<Example>} 例句列表
*/ */
_extractExamples(doc) { _extractExamples(html) {
const examples = []; const examples = [];
// 尝试多个可能的选择器 // 匹配例句模式
const selectors = [ const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
'.examples li', // 主要例句列表
'.example-item', // 例句项 let match;
'.sentence', // 句子区域 const seen = new Set();
'[class*="example"] li', // 包含 example 的类
'.content .ex_li' // 通用例句 while ((match = sentencePattern.exec(html)) !== null) {
]; const sentence = match[1].trim();
const translation = match[2].trim();
for (const selector of selectors) {
const items = doc.querySelectorAll(selector);
for (const item of items) { if (seen.has(sentence)) continue;
const enEl = item.querySelector('.en-sentence, .english, [class*="english"]'); seen.add(sentence);
const cnEl = item.querySelector('.cn-sentence, .chinese, [class*="chinese"]');
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
const sentence = enEl?.textContent?.trim() || item.textContent?.trim(); examples.push(createExample(sentence, translation));
const translation = cnEl?.textContent?.trim() || '';
if (sentence) {
examples.push(createExample(sentence, translation));
}
if (examples.length >= 2) break;
} }
if (examples.length > 0) break; if (examples.length >= 2) break;
} }
return examples; return examples;