fix(M3): 使用正则表达式替代 DOMParser,修复 Service Worker 环境限制
This commit is contained in:
parent
64290b9dd1
commit
a83d5d7bb9
@ -32,7 +32,6 @@ export class BingDictionary extends DictionaryBase {
|
||||
const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`;
|
||||
|
||||
try {
|
||||
// 在 Background 中直接使用 fetch
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
@ -47,12 +46,11 @@ export class BingDictionary extends DictionaryBase {
|
||||
|
||||
const html = await response.text();
|
||||
|
||||
// 解析 HTML 提取数据
|
||||
// 使用正则提取数据
|
||||
return this._parseHtml(html, trimmedWord, url);
|
||||
} catch (error) {
|
||||
console.error('[BingDictionary] Search failed:', error);
|
||||
|
||||
// 返回友好错误提示
|
||||
return createResult({
|
||||
word: trimmedWord,
|
||||
phonetic: '',
|
||||
@ -64,7 +62,7 @@ export class BingDictionary extends DictionaryBase {
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析必应词典 HTML
|
||||
* 解析必应词典 HTML(使用正则)
|
||||
* @private
|
||||
* @param {string} html - HTML 内容
|
||||
* @param {string} word - 查询的单词
|
||||
@ -72,23 +70,11 @@ export class BingDictionary extends DictionaryBase {
|
||||
* @returns {DictionaryResult} 解析结果
|
||||
*/
|
||||
_parseHtml(html, word, url) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(html, 'text/html');
|
||||
|
||||
// 提取音标
|
||||
const phonetic = this._extractPhonetic(doc);
|
||||
|
||||
// 提取释义
|
||||
const meanings = this._extractMeanings(doc);
|
||||
|
||||
// 提取例句
|
||||
const examples = this._extractExamples(doc);
|
||||
|
||||
return createResult({
|
||||
word,
|
||||
phonetic,
|
||||
meanings,
|
||||
examples,
|
||||
phonetic: this._extractPhonetic(html),
|
||||
meanings: this._extractMeanings(html),
|
||||
examples: this._extractExamples(html),
|
||||
url
|
||||
});
|
||||
}
|
||||
@ -96,34 +82,21 @@ export class BingDictionary extends DictionaryBase {
|
||||
/**
|
||||
* 提取音标
|
||||
* @private
|
||||
* @param {Document} doc - HTML 文档
|
||||
* @param {string} html - HTML 内容
|
||||
* @returns {string} 音标
|
||||
*/
|
||||
_extractPhonetic(doc) {
|
||||
// 尝试多个可能的选择器
|
||||
const selectors = [
|
||||
'.hd_p1_1F_OWM', // 主要音标容器
|
||||
'.hd_tf_lh', // 音标文本
|
||||
'[class*="phonetic"]', // 包含 phonetic 的类
|
||||
'.prons' // 发音区域
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const elements = doc.querySelectorAll(selector);
|
||||
for (const el of elements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (text && text.includes('/')) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 正则提取 fallback
|
||||
const bodyText = doc.body?.textContent || '';
|
||||
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
|
||||
_extractPhonetic(html) {
|
||||
// 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/
|
||||
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
|
||||
if (match) {
|
||||
return match[0];
|
||||
}
|
||||
|
||||
// 备选:匹配 /.../ 格式
|
||||
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
|
||||
if (match2) {
|
||||
return match2[0];
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
@ -131,46 +104,44 @@ export class BingDictionary extends DictionaryBase {
|
||||
/**
|
||||
* 提取释义
|
||||
* @private
|
||||
* @param {Document} doc - HTML 文档
|
||||
* @param {string} html - HTML 内容
|
||||
* @returns {Array<Meaning>} 释义列表
|
||||
*/
|
||||
_extractMeanings(doc) {
|
||||
_extractMeanings(html) {
|
||||
const meanings = [];
|
||||
|
||||
// 尝试多个可能的选择器
|
||||
const selectors = [
|
||||
'.qdef ul li', // 主要释义列表
|
||||
'.def li', // 备选释义
|
||||
'[class*="meaning"] li', // 包含 meaning 的类
|
||||
'.content ul li' // 通用内容列表
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const items = doc.querySelectorAll(selector);
|
||||
|
||||
for (const item of items) {
|
||||
const text = item.textContent?.trim();
|
||||
if (!text) continue;
|
||||
|
||||
// 尝试匹配词性和释义
|
||||
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
|
||||
if (match) {
|
||||
const partOfSpeech = match[1];
|
||||
const defsText = match[2];
|
||||
|
||||
// 分割多个释义
|
||||
const definitions = defsText
|
||||
.split(/[;;]/)
|
||||
.map(d => d.trim())
|
||||
.filter(d => d.length > 0);
|
||||
|
||||
if (definitions.length > 0) {
|
||||
meanings.push(createMeaning(partOfSpeech, definitions));
|
||||
}
|
||||
}
|
||||
// 尝试匹配常见的词典释义格式
|
||||
// 格式1: <span class="pos">n.</span><span class="def">定义</span>
|
||||
const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi;
|
||||
|
||||
let match;
|
||||
while ((match = posDefPattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
if (partOfSpeech && definition) {
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
}
|
||||
}
|
||||
|
||||
if (meanings.length > 0) break;
|
||||
// 格式2: 直接匹配 "词性. 释义" 格式
|
||||
if (meanings.length === 0) {
|
||||
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
|
||||
const seen = new Set();
|
||||
|
||||
while ((match = simplePattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
|
||||
// 过滤无效结果
|
||||
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
|
||||
if (definition.length < 2 || definition.length > 30) continue;
|
||||
if (seen.has(definition)) continue;
|
||||
|
||||
seen.add(definition);
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
|
||||
if (meanings.length >= 5) break;
|
||||
}
|
||||
}
|
||||
|
||||
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
|
||||
@ -179,38 +150,32 @@ export class BingDictionary extends DictionaryBase {
|
||||
/**
|
||||
* 提取例句
|
||||
* @private
|
||||
* @param {Document} doc - HTML 文档
|
||||
* @param {string} html - HTML 内容
|
||||
* @returns {Array<Example>} 例句列表
|
||||
*/
|
||||
_extractExamples(doc) {
|
||||
_extractExamples(html) {
|
||||
const examples = [];
|
||||
|
||||
// 尝试多个可能的选择器
|
||||
const selectors = [
|
||||
'.sen_li', // 主要例句容器
|
||||
'.sentences li', // 备选例句
|
||||
'[class*="example"] li', // 包含 example 的类
|
||||
'.content .ex_li' // 通用例句
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const items = doc.querySelectorAll(selector);
|
||||
// 匹配例句模式:英文句子后跟中文翻译
|
||||
// 尝试匹配 <li> 或 <div> 中的例句
|
||||
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
|
||||
|
||||
let match;
|
||||
const seen = new Set();
|
||||
|
||||
while ((match = sentencePattern.exec(html)) !== null) {
|
||||
const sentence = match[1].trim();
|
||||
const translation = match[2].trim();
|
||||
|
||||
for (const item of items) {
|
||||
const enEl = item.querySelector('.sen_en, .en_sent, [class*="english"]');
|
||||
const cnEl = item.querySelector('.sen_cn, .cn_sent, [class*="chinese"]');
|
||||
|
||||
const sentence = enEl?.textContent?.trim() || item.textContent?.trim();
|
||||
const translation = cnEl?.textContent?.trim() || '';
|
||||
|
||||
if (sentence) {
|
||||
examples.push(createExample(sentence, translation));
|
||||
}
|
||||
|
||||
if (examples.length >= 2) break;
|
||||
if (seen.has(sentence)) continue;
|
||||
seen.add(sentence);
|
||||
|
||||
// 验证:英文句子应该包含空格且长度合适
|
||||
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
|
||||
examples.push(createExample(sentence, translation));
|
||||
}
|
||||
|
||||
if (examples.length > 0) break;
|
||||
|
||||
if (examples.length >= 2) break;
|
||||
}
|
||||
|
||||
return examples;
|
||||
|
||||
@ -32,7 +32,6 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`;
|
||||
|
||||
try {
|
||||
// 在 Background 中直接使用 fetch
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
@ -47,12 +46,11 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
|
||||
const html = await response.text();
|
||||
|
||||
// 解析 HTML 提取数据
|
||||
// 使用正则提取数据
|
||||
return this._parseHtml(html, trimmedWord, url);
|
||||
} catch (error) {
|
||||
console.error('[YoudaoDictionary] Search failed:', error);
|
||||
|
||||
// 返回友好错误提示
|
||||
return createResult({
|
||||
word: trimmedWord,
|
||||
phonetic: '',
|
||||
@ -64,7 +62,7 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析有道词典 HTML
|
||||
* 解析有道词典 HTML(使用正则)
|
||||
* @private
|
||||
* @param {string} html - HTML 内容
|
||||
* @param {string} word - 查询的单词
|
||||
@ -72,23 +70,11 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
* @returns {DictionaryResult} 解析结果
|
||||
*/
|
||||
_parseHtml(html, word, url) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(html, 'text/html');
|
||||
|
||||
// 提取音标
|
||||
const phonetic = this._extractPhonetic(doc);
|
||||
|
||||
// 提取释义
|
||||
const meanings = this._extractMeanings(doc);
|
||||
|
||||
// 提取例句
|
||||
const examples = this._extractExamples(doc);
|
||||
|
||||
return createResult({
|
||||
word,
|
||||
phonetic,
|
||||
meanings,
|
||||
examples,
|
||||
phonetic: this._extractPhonetic(html),
|
||||
meanings: this._extractMeanings(html),
|
||||
examples: this._extractExamples(html),
|
||||
url
|
||||
});
|
||||
}
|
||||
@ -96,34 +82,20 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
/**
|
||||
* 提取音标
|
||||
* @private
|
||||
* @param {Document} doc - HTML 文档
|
||||
* @param {string} html - HTML 内容
|
||||
* @returns {string} 音标
|
||||
*/
|
||||
_extractPhonetic(doc) {
|
||||
// 尝试多个可能的选择器
|
||||
const selectors = [
|
||||
'.phonetic', // 主要音标类
|
||||
'.pronounce', // 发音区域
|
||||
'[class*="phonetic"]', // 包含 phonetic 的类
|
||||
'.word-info .phonetic' // 单词信息区的音标
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const elements = doc.querySelectorAll(selector);
|
||||
for (const el of elements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (text && (text.includes('/') || text.includes('['))) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 正则提取 fallback
|
||||
const bodyText = doc.body?.textContent || '';
|
||||
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
|
||||
_extractPhonetic(html) {
|
||||
// 匹配音标格式
|
||||
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
|
||||
if (match) {
|
||||
return match[0];
|
||||
}
|
||||
|
||||
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
|
||||
if (match2) {
|
||||
return match2[0];
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
@ -131,50 +103,42 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
/**
|
||||
* 提取释义
|
||||
* @private
|
||||
* @param {Document} doc - HTML 文档
|
||||
* @param {string} html - HTML 内容
|
||||
* @returns {Array<Meaning>} 释义列表
|
||||
*/
|
||||
_extractMeanings(doc) {
|
||||
_extractMeanings(html) {
|
||||
const meanings = [];
|
||||
|
||||
// 尝试多个可能的选择器
|
||||
const selectors = [
|
||||
'.trans-container ul li', // 主要释义列表
|
||||
'.basic .word-exp', // 基本释义
|
||||
'.meaning li', // 备选释义
|
||||
'[class*="meaning"] li', // 包含 meaning 的类
|
||||
'.content ul li' // 通用内容列表
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const items = doc.querySelectorAll(selector);
|
||||
|
||||
for (const item of items) {
|
||||
const text = item.textContent?.trim();
|
||||
if (!text) continue;
|
||||
|
||||
// 尝试匹配词性和释义
|
||||
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
|
||||
if (match) {
|
||||
const partOfSpeech = match[1];
|
||||
const defsText = match[2];
|
||||
|
||||
// 分割多个释义
|
||||
const definitions = defsText
|
||||
.split(/[;;]/)
|
||||
.map(d => d.trim())
|
||||
.filter(d => d.length > 0);
|
||||
|
||||
if (definitions.length > 0) {
|
||||
meanings.push(createMeaning(partOfSpeech, definitions));
|
||||
}
|
||||
} else if (text.length > 0 && text.length < 50) {
|
||||
// 没有词性标记的释义
|
||||
meanings.push(createMeaning('', [text]));
|
||||
}
|
||||
// 尝试匹配常见的词典释义格式
|
||||
const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi;
|
||||
|
||||
let match;
|
||||
while ((match = posDefPattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
if (partOfSpeech && definition) {
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
}
|
||||
}
|
||||
|
||||
if (meanings.length > 0) break;
|
||||
// 备选:直接匹配 "词性. 释义" 格式
|
||||
if (meanings.length === 0) {
|
||||
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
|
||||
const seen = new Set();
|
||||
|
||||
while ((match = simplePattern.exec(html)) !== null) {
|
||||
const partOfSpeech = match[1].trim();
|
||||
const definition = match[2].trim();
|
||||
|
||||
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
|
||||
if (definition.length < 2 || definition.length > 30) continue;
|
||||
if (seen.has(definition)) continue;
|
||||
|
||||
seen.add(definition);
|
||||
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||
|
||||
if (meanings.length >= 5) break;
|
||||
}
|
||||
}
|
||||
|
||||
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
|
||||
@ -183,39 +147,30 @@ export class YoudaoDictionary extends DictionaryBase {
|
||||
/**
|
||||
* 提取例句
|
||||
* @private
|
||||
* @param {Document} doc - HTML 文档
|
||||
* @param {string} html - HTML 内容
|
||||
* @returns {Array<Example>} 例句列表
|
||||
*/
|
||||
_extractExamples(doc) {
|
||||
_extractExamples(html) {
|
||||
const examples = [];
|
||||
|
||||
// 尝试多个可能的选择器
|
||||
const selectors = [
|
||||
'.examples li', // 主要例句列表
|
||||
'.example-item', // 例句项
|
||||
'.sentence', // 句子区域
|
||||
'[class*="example"] li', // 包含 example 的类
|
||||
'.content .ex_li' // 通用例句
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const items = doc.querySelectorAll(selector);
|
||||
// 匹配例句模式
|
||||
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
|
||||
|
||||
let match;
|
||||
const seen = new Set();
|
||||
|
||||
while ((match = sentencePattern.exec(html)) !== null) {
|
||||
const sentence = match[1].trim();
|
||||
const translation = match[2].trim();
|
||||
|
||||
for (const item of items) {
|
||||
const enEl = item.querySelector('.en-sentence, .english, [class*="english"]');
|
||||
const cnEl = item.querySelector('.cn-sentence, .chinese, [class*="chinese"]');
|
||||
|
||||
const sentence = enEl?.textContent?.trim() || item.textContent?.trim();
|
||||
const translation = cnEl?.textContent?.trim() || '';
|
||||
|
||||
if (sentence) {
|
||||
examples.push(createExample(sentence, translation));
|
||||
}
|
||||
|
||||
if (examples.length >= 2) break;
|
||||
if (seen.has(sentence)) continue;
|
||||
seen.add(sentence);
|
||||
|
||||
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
|
||||
examples.push(createExample(sentence, translation));
|
||||
}
|
||||
|
||||
if (examples.length > 0) break;
|
||||
|
||||
if (examples.length >= 2) break;
|
||||
}
|
||||
|
||||
return examples;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user