fix(M3): 使用正则表达式替代 DOMParser,修复 Service Worker 环境限制

This commit is contained in:
李岩岩 2026-02-11 15:01:25 +08:00
parent 64290b9dd1
commit a83d5d7bb9
2 changed files with 129 additions and 209 deletions

View File

@ -32,7 +32,6 @@ export class BingDictionary extends DictionaryBase {
const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`;
try {
// 在 Background 中直接使用 fetch
const response = await fetch(url, {
method: 'GET',
headers: {
@ -47,12 +46,11 @@ export class BingDictionary extends DictionaryBase {
const html = await response.text();
// 解析 HTML 提取数据
// 使用正则提取数据
return this._parseHtml(html, trimmedWord, url);
} catch (error) {
console.error('[BingDictionary] Search failed:', error);
// 返回友好错误提示
return createResult({
word: trimmedWord,
phonetic: '',
@ -64,7 +62,7 @@ export class BingDictionary extends DictionaryBase {
}
/**
* 解析必应词典 HTML
* 解析必应词典 HTML使用正则
* @private
* @param {string} html - HTML 内容
* @param {string} word - 查询的单词
@ -72,23 +70,11 @@ export class BingDictionary extends DictionaryBase {
* @returns {DictionaryResult} 解析结果
*/
_parseHtml(html, word, url) {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// 提取音标
const phonetic = this._extractPhonetic(doc);
// 提取释义
const meanings = this._extractMeanings(doc);
// 提取例句
const examples = this._extractExamples(doc);
return createResult({
word,
phonetic,
meanings,
examples,
phonetic: this._extractPhonetic(html),
meanings: this._extractMeanings(html),
examples: this._extractExamples(html),
url
});
}
@ -96,34 +82,21 @@ export class BingDictionary extends DictionaryBase {
/**
* 提取音标
* @private
* @param {Document} doc - HTML 文档
* @param {string} html - HTML 内容
* @returns {string} 音标
*/
_extractPhonetic(doc) {
// 尝试多个可能的选择器
const selectors = [
'.hd_p1_1F_OWM', // 主要音标容器
'.hd_tf_lh', // 音标文本
'[class*="phonetic"]', // 包含 phonetic 的类
'.prons' // 发音区域
];
for (const selector of selectors) {
const elements = doc.querySelectorAll(selector);
for (const el of elements) {
const text = el.textContent?.trim();
if (text && text.includes('/')) {
return text;
}
}
}
// 正则提取 fallback
const bodyText = doc.body?.textContent || '';
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
_extractPhonetic(html) {
// 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
if (match) {
return match[0];
}
// 备选:匹配 /.../ 格式
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
if (match2) {
return match2[0];
}
return '';
}
@ -131,46 +104,44 @@ export class BingDictionary extends DictionaryBase {
/**
* 提取释义
* @private
* @param {Document} doc - HTML 文档
* @param {string} html - HTML 内容
* @returns {Array<Meaning>} 释义列表
*/
_extractMeanings(doc) {
_extractMeanings(html) {
const meanings = [];
// 尝试多个可能的选择器
const selectors = [
'.qdef ul li', // 主要释义列表
'.def li', // 备选释义
'[class*="meaning"] li', // 包含 meaning 的类
'.content ul li' // 通用内容列表
];
for (const selector of selectors) {
const items = doc.querySelectorAll(selector);
for (const item of items) {
const text = item.textContent?.trim();
if (!text) continue;
// 尝试匹配词性和释义
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
if (match) {
const partOfSpeech = match[1];
const defsText = match[2];
// 分割多个释义
const definitions = defsText
.split(/[;]/)
.map(d => d.trim())
.filter(d => d.length > 0);
if (definitions.length > 0) {
meanings.push(createMeaning(partOfSpeech, definitions));
}
}
// 尝试匹配常见的词典释义格式
// 格式1: <span class="pos">n.</span><span class="def">定义</span>
const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi;
let match;
while ((match = posDefPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition]));
}
}
if (meanings.length > 0) break;
// 格式2: 直接匹配 "词性. 释义" 格式
if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
// 过滤无效结果
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
if (definition.length < 2 || definition.length > 30) continue;
if (seen.has(definition)) continue;
seen.add(definition);
meanings.push(createMeaning(partOfSpeech, [definition]));
if (meanings.length >= 5) break;
}
}
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
@ -179,38 +150,32 @@ export class BingDictionary extends DictionaryBase {
/**
* 提取例句
* @private
* @param {Document} doc - HTML 文档
* @param {string} html - HTML 内容
* @returns {Array<Example>} 例句列表
*/
_extractExamples(doc) {
_extractExamples(html) {
const examples = [];
// 尝试多个可能的选择器
const selectors = [
'.sen_li', // 主要例句容器
'.sentences li', // 备选例句
'[class*="example"] li', // 包含 example 的类
'.content .ex_li' // 通用例句
];
for (const selector of selectors) {
const items = doc.querySelectorAll(selector);
// 匹配例句模式:英文句子后跟中文翻译
// 尝试匹配 <li> 或 <div> 中的例句
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
let match;
const seen = new Set();
while ((match = sentencePattern.exec(html)) !== null) {
const sentence = match[1].trim();
const translation = match[2].trim();
for (const item of items) {
const enEl = item.querySelector('.sen_en, .en_sent, [class*="english"]');
const cnEl = item.querySelector('.sen_cn, .cn_sent, [class*="chinese"]');
const sentence = enEl?.textContent?.trim() || item.textContent?.trim();
const translation = cnEl?.textContent?.trim() || '';
if (sentence) {
examples.push(createExample(sentence, translation));
}
if (examples.length >= 2) break;
if (seen.has(sentence)) continue;
seen.add(sentence);
// 验证:英文句子应该包含空格且长度合适
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
examples.push(createExample(sentence, translation));
}
if (examples.length > 0) break;
if (examples.length >= 2) break;
}
return examples;

View File

@ -32,7 +32,6 @@ export class YoudaoDictionary extends DictionaryBase {
const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`;
try {
// 在 Background 中直接使用 fetch
const response = await fetch(url, {
method: 'GET',
headers: {
@ -47,12 +46,11 @@ export class YoudaoDictionary extends DictionaryBase {
const html = await response.text();
// 解析 HTML 提取数据
// 使用正则提取数据
return this._parseHtml(html, trimmedWord, url);
} catch (error) {
console.error('[YoudaoDictionary] Search failed:', error);
// 返回友好错误提示
return createResult({
word: trimmedWord,
phonetic: '',
@ -64,7 +62,7 @@ export class YoudaoDictionary extends DictionaryBase {
}
/**
* 解析有道词典 HTML
* 解析有道词典 HTML使用正则
* @private
* @param {string} html - HTML 内容
* @param {string} word - 查询的单词
@ -72,23 +70,11 @@ export class YoudaoDictionary extends DictionaryBase {
* @returns {DictionaryResult} 解析结果
*/
_parseHtml(html, word, url) {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
// 提取音标
const phonetic = this._extractPhonetic(doc);
// 提取释义
const meanings = this._extractMeanings(doc);
// 提取例句
const examples = this._extractExamples(doc);
return createResult({
word,
phonetic,
meanings,
examples,
phonetic: this._extractPhonetic(html),
meanings: this._extractMeanings(html),
examples: this._extractExamples(html),
url
});
}
@ -96,34 +82,20 @@ export class YoudaoDictionary extends DictionaryBase {
/**
* 提取音标
* @private
* @param {Document} doc - HTML 文档
* @param {string} html - HTML 内容
* @returns {string} 音标
*/
_extractPhonetic(doc) {
// 尝试多个可能的选择器
const selectors = [
'.phonetic', // 主要音标类
'.pronounce', // 发音区域
'[class*="phonetic"]', // 包含 phonetic 的类
'.word-info .phonetic' // 单词信息区的音标
];
for (const selector of selectors) {
const elements = doc.querySelectorAll(selector);
for (const el of elements) {
const text = el.textContent?.trim();
if (text && (text.includes('/') || text.includes('['))) {
return text;
}
}
}
// 正则提取 fallback
const bodyText = doc.body?.textContent || '';
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
_extractPhonetic(html) {
// 匹配音标格式
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
if (match) {
return match[0];
}
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
if (match2) {
return match2[0];
}
return '';
}
@ -131,50 +103,42 @@ export class YoudaoDictionary extends DictionaryBase {
/**
* 提取释义
* @private
* @param {Document} doc - HTML 文档
* @param {string} html - HTML 内容
* @returns {Array<Meaning>} 释义列表
*/
_extractMeanings(doc) {
_extractMeanings(html) {
const meanings = [];
// 尝试多个可能的选择器
const selectors = [
'.trans-container ul li', // 主要释义列表
'.basic .word-exp', // 基本释义
'.meaning li', // 备选释义
'[class*="meaning"] li', // 包含 meaning 的类
'.content ul li' // 通用内容列表
];
for (const selector of selectors) {
const items = doc.querySelectorAll(selector);
for (const item of items) {
const text = item.textContent?.trim();
if (!text) continue;
// 尝试匹配词性和释义
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
if (match) {
const partOfSpeech = match[1];
const defsText = match[2];
// 分割多个释义
const definitions = defsText
.split(/[;]/)
.map(d => d.trim())
.filter(d => d.length > 0);
if (definitions.length > 0) {
meanings.push(createMeaning(partOfSpeech, definitions));
}
} else if (text.length > 0 && text.length < 50) {
// 没有词性标记的释义
meanings.push(createMeaning('', [text]));
}
// 尝试匹配常见的词典释义格式
const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi;
let match;
while ((match = posDefPattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
if (partOfSpeech && definition) {
meanings.push(createMeaning(partOfSpeech, [definition]));
}
}
if (meanings.length > 0) break;
// 备选:直接匹配 "词性. 释义" 格式
if (meanings.length === 0) {
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
const seen = new Set();
while ((match = simplePattern.exec(html)) !== null) {
const partOfSpeech = match[1].trim();
const definition = match[2].trim();
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
if (definition.length < 2 || definition.length > 30) continue;
if (seen.has(definition)) continue;
seen.add(definition);
meanings.push(createMeaning(partOfSpeech, [definition]));
if (meanings.length >= 5) break;
}
}
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
@ -183,39 +147,30 @@ export class YoudaoDictionary extends DictionaryBase {
/**
* 提取例句
* @private
* @param {Document} doc - HTML 文档
* @param {string} html - HTML 内容
* @returns {Array<Example>} 例句列表
*/
_extractExamples(doc) {
_extractExamples(html) {
const examples = [];
// 尝试多个可能的选择器
const selectors = [
'.examples li', // 主要例句列表
'.example-item', // 例句项
'.sentence', // 句子区域
'[class*="example"] li', // 包含 example 的类
'.content .ex_li' // 通用例句
];
for (const selector of selectors) {
const items = doc.querySelectorAll(selector);
// 匹配例句模式
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
let match;
const seen = new Set();
while ((match = sentencePattern.exec(html)) !== null) {
const sentence = match[1].trim();
const translation = match[2].trim();
for (const item of items) {
const enEl = item.querySelector('.en-sentence, .english, [class*="english"]');
const cnEl = item.querySelector('.cn-sentence, .chinese, [class*="chinese"]');
const sentence = enEl?.textContent?.trim() || item.textContent?.trim();
const translation = cnEl?.textContent?.trim() || '';
if (sentence) {
examples.push(createExample(sentence, translation));
}
if (examples.length >= 2) break;
if (seen.has(sentence)) continue;
seen.add(sentence);
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
examples.push(createExample(sentence, translation));
}
if (examples.length > 0) break;
if (examples.length >= 2) break;
}
return examples;