fix(M3): 使用正则表达式替代 DOMParser,修复 Service Worker 环境限制
This commit is contained in:
parent
64290b9dd1
commit
a83d5d7bb9
@ -32,7 +32,6 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`;
|
const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 在 Background 中直接使用 fetch
|
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: 'GET',
|
method: 'GET',
|
||||||
headers: {
|
headers: {
|
||||||
@ -47,12 +46,11 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
|
|
||||||
const html = await response.text();
|
const html = await response.text();
|
||||||
|
|
||||||
// 解析 HTML 提取数据
|
// 使用正则提取数据
|
||||||
return this._parseHtml(html, trimmedWord, url);
|
return this._parseHtml(html, trimmedWord, url);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[BingDictionary] Search failed:', error);
|
console.error('[BingDictionary] Search failed:', error);
|
||||||
|
|
||||||
// 返回友好错误提示
|
|
||||||
return createResult({
|
return createResult({
|
||||||
word: trimmedWord,
|
word: trimmedWord,
|
||||||
phonetic: '',
|
phonetic: '',
|
||||||
@ -64,7 +62,7 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 解析必应词典 HTML
|
* 解析必应词典 HTML(使用正则)
|
||||||
* @private
|
* @private
|
||||||
* @param {string} html - HTML 内容
|
* @param {string} html - HTML 内容
|
||||||
* @param {string} word - 查询的单词
|
* @param {string} word - 查询的单词
|
||||||
@ -72,23 +70,11 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
* @returns {DictionaryResult} 解析结果
|
* @returns {DictionaryResult} 解析结果
|
||||||
*/
|
*/
|
||||||
_parseHtml(html, word, url) {
|
_parseHtml(html, word, url) {
|
||||||
const parser = new DOMParser();
|
|
||||||
const doc = parser.parseFromString(html, 'text/html');
|
|
||||||
|
|
||||||
// 提取音标
|
|
||||||
const phonetic = this._extractPhonetic(doc);
|
|
||||||
|
|
||||||
// 提取释义
|
|
||||||
const meanings = this._extractMeanings(doc);
|
|
||||||
|
|
||||||
// 提取例句
|
|
||||||
const examples = this._extractExamples(doc);
|
|
||||||
|
|
||||||
return createResult({
|
return createResult({
|
||||||
word,
|
word,
|
||||||
phonetic,
|
phonetic: this._extractPhonetic(html),
|
||||||
meanings,
|
meanings: this._extractMeanings(html),
|
||||||
examples,
|
examples: this._extractExamples(html),
|
||||||
url
|
url
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -96,34 +82,21 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
/**
|
/**
|
||||||
* 提取音标
|
* 提取音标
|
||||||
* @private
|
* @private
|
||||||
* @param {Document} doc - HTML 文档
|
* @param {string} html - HTML 内容
|
||||||
* @returns {string} 音标
|
* @returns {string} 音标
|
||||||
*/
|
*/
|
||||||
_extractPhonetic(doc) {
|
_extractPhonetic(html) {
|
||||||
// 尝试多个可能的选择器
|
// 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/
|
||||||
const selectors = [
|
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
|
||||||
'.hd_p1_1F_OWM', // 主要音标容器
|
|
||||||
'.hd_tf_lh', // 音标文本
|
|
||||||
'[class*="phonetic"]', // 包含 phonetic 的类
|
|
||||||
'.prons' // 发音区域
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const selector of selectors) {
|
|
||||||
const elements = doc.querySelectorAll(selector);
|
|
||||||
for (const el of elements) {
|
|
||||||
const text = el.textContent?.trim();
|
|
||||||
if (text && text.includes('/')) {
|
|
||||||
return text;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 正则提取 fallback
|
|
||||||
const bodyText = doc.body?.textContent || '';
|
|
||||||
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
|
|
||||||
if (match) {
|
if (match) {
|
||||||
return match[0];
|
return match[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 备选:匹配 /.../ 格式
|
||||||
|
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
|
||||||
|
if (match2) {
|
||||||
|
return match2[0];
|
||||||
|
}
|
||||||
|
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
@ -131,46 +104,44 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
/**
|
/**
|
||||||
* 提取释义
|
* 提取释义
|
||||||
* @private
|
* @private
|
||||||
* @param {Document} doc - HTML 文档
|
* @param {string} html - HTML 内容
|
||||||
* @returns {Array<Meaning>} 释义列表
|
* @returns {Array<Meaning>} 释义列表
|
||||||
*/
|
*/
|
||||||
_extractMeanings(doc) {
|
_extractMeanings(html) {
|
||||||
const meanings = [];
|
const meanings = [];
|
||||||
|
|
||||||
// 尝试多个可能的选择器
|
// 尝试匹配常见的词典释义格式
|
||||||
const selectors = [
|
// 格式1: <span class="pos">n.</span><span class="def">定义</span>
|
||||||
'.qdef ul li', // 主要释义列表
|
const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi;
|
||||||
'.def li', // 备选释义
|
|
||||||
'[class*="meaning"] li', // 包含 meaning 的类
|
let match;
|
||||||
'.content ul li' // 通用内容列表
|
while ((match = posDefPattern.exec(html)) !== null) {
|
||||||
];
|
const partOfSpeech = match[1].trim();
|
||||||
|
const definition = match[2].trim();
|
||||||
for (const selector of selectors) {
|
if (partOfSpeech && definition) {
|
||||||
const items = doc.querySelectorAll(selector);
|
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||||
|
|
||||||
for (const item of items) {
|
|
||||||
const text = item.textContent?.trim();
|
|
||||||
if (!text) continue;
|
|
||||||
|
|
||||||
// 尝试匹配词性和释义
|
|
||||||
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
|
|
||||||
if (match) {
|
|
||||||
const partOfSpeech = match[1];
|
|
||||||
const defsText = match[2];
|
|
||||||
|
|
||||||
// 分割多个释义
|
|
||||||
const definitions = defsText
|
|
||||||
.split(/[;;]/)
|
|
||||||
.map(d => d.trim())
|
|
||||||
.filter(d => d.length > 0);
|
|
||||||
|
|
||||||
if (definitions.length > 0) {
|
|
||||||
meanings.push(createMeaning(partOfSpeech, definitions));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (meanings.length > 0) break;
|
// 格式2: 直接匹配 "词性. 释义" 格式
|
||||||
|
if (meanings.length === 0) {
|
||||||
|
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
|
||||||
|
const seen = new Set();
|
||||||
|
|
||||||
|
while ((match = simplePattern.exec(html)) !== null) {
|
||||||
|
const partOfSpeech = match[1].trim();
|
||||||
|
const definition = match[2].trim();
|
||||||
|
|
||||||
|
// 过滤无效结果
|
||||||
|
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
|
||||||
|
if (definition.length < 2 || definition.length > 30) continue;
|
||||||
|
if (seen.has(definition)) continue;
|
||||||
|
|
||||||
|
seen.add(definition);
|
||||||
|
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||||
|
|
||||||
|
if (meanings.length >= 5) break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
|
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
|
||||||
@ -179,38 +150,32 @@ export class BingDictionary extends DictionaryBase {
|
|||||||
/**
|
/**
|
||||||
* 提取例句
|
* 提取例句
|
||||||
* @private
|
* @private
|
||||||
* @param {Document} doc - HTML 文档
|
* @param {string} html - HTML 内容
|
||||||
* @returns {Array<Example>} 例句列表
|
* @returns {Array<Example>} 例句列表
|
||||||
*/
|
*/
|
||||||
_extractExamples(doc) {
|
_extractExamples(html) {
|
||||||
const examples = [];
|
const examples = [];
|
||||||
|
|
||||||
// 尝试多个可能的选择器
|
// 匹配例句模式:英文句子后跟中文翻译
|
||||||
const selectors = [
|
// 尝试匹配 <li> 或 <div> 中的例句
|
||||||
'.sen_li', // 主要例句容器
|
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
|
||||||
'.sentences li', // 备选例句
|
|
||||||
'[class*="example"] li', // 包含 example 的类
|
let match;
|
||||||
'.content .ex_li' // 通用例句
|
const seen = new Set();
|
||||||
];
|
|
||||||
|
while ((match = sentencePattern.exec(html)) !== null) {
|
||||||
for (const selector of selectors) {
|
const sentence = match[1].trim();
|
||||||
const items = doc.querySelectorAll(selector);
|
const translation = match[2].trim();
|
||||||
|
|
||||||
for (const item of items) {
|
if (seen.has(sentence)) continue;
|
||||||
const enEl = item.querySelector('.sen_en, .en_sent, [class*="english"]');
|
seen.add(sentence);
|
||||||
const cnEl = item.querySelector('.sen_cn, .cn_sent, [class*="chinese"]');
|
|
||||||
|
// 验证:英文句子应该包含空格且长度合适
|
||||||
const sentence = enEl?.textContent?.trim() || item.textContent?.trim();
|
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
|
||||||
const translation = cnEl?.textContent?.trim() || '';
|
examples.push(createExample(sentence, translation));
|
||||||
|
|
||||||
if (sentence) {
|
|
||||||
examples.push(createExample(sentence, translation));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (examples.length >= 2) break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (examples.length > 0) break;
|
if (examples.length >= 2) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return examples;
|
return examples;
|
||||||
|
|||||||
@ -32,7 +32,6 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`;
|
const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 在 Background 中直接使用 fetch
|
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: 'GET',
|
method: 'GET',
|
||||||
headers: {
|
headers: {
|
||||||
@ -47,12 +46,11 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
|
|
||||||
const html = await response.text();
|
const html = await response.text();
|
||||||
|
|
||||||
// 解析 HTML 提取数据
|
// 使用正则提取数据
|
||||||
return this._parseHtml(html, trimmedWord, url);
|
return this._parseHtml(html, trimmedWord, url);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[YoudaoDictionary] Search failed:', error);
|
console.error('[YoudaoDictionary] Search failed:', error);
|
||||||
|
|
||||||
// 返回友好错误提示
|
|
||||||
return createResult({
|
return createResult({
|
||||||
word: trimmedWord,
|
word: trimmedWord,
|
||||||
phonetic: '',
|
phonetic: '',
|
||||||
@ -64,7 +62,7 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 解析有道词典 HTML
|
* 解析有道词典 HTML(使用正则)
|
||||||
* @private
|
* @private
|
||||||
* @param {string} html - HTML 内容
|
* @param {string} html - HTML 内容
|
||||||
* @param {string} word - 查询的单词
|
* @param {string} word - 查询的单词
|
||||||
@ -72,23 +70,11 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
* @returns {DictionaryResult} 解析结果
|
* @returns {DictionaryResult} 解析结果
|
||||||
*/
|
*/
|
||||||
_parseHtml(html, word, url) {
|
_parseHtml(html, word, url) {
|
||||||
const parser = new DOMParser();
|
|
||||||
const doc = parser.parseFromString(html, 'text/html');
|
|
||||||
|
|
||||||
// 提取音标
|
|
||||||
const phonetic = this._extractPhonetic(doc);
|
|
||||||
|
|
||||||
// 提取释义
|
|
||||||
const meanings = this._extractMeanings(doc);
|
|
||||||
|
|
||||||
// 提取例句
|
|
||||||
const examples = this._extractExamples(doc);
|
|
||||||
|
|
||||||
return createResult({
|
return createResult({
|
||||||
word,
|
word,
|
||||||
phonetic,
|
phonetic: this._extractPhonetic(html),
|
||||||
meanings,
|
meanings: this._extractMeanings(html),
|
||||||
examples,
|
examples: this._extractExamples(html),
|
||||||
url
|
url
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -96,34 +82,20 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
/**
|
/**
|
||||||
* 提取音标
|
* 提取音标
|
||||||
* @private
|
* @private
|
||||||
* @param {Document} doc - HTML 文档
|
* @param {string} html - HTML 内容
|
||||||
* @returns {string} 音标
|
* @returns {string} 音标
|
||||||
*/
|
*/
|
||||||
_extractPhonetic(doc) {
|
_extractPhonetic(html) {
|
||||||
// 尝试多个可能的选择器
|
// 匹配音标格式
|
||||||
const selectors = [
|
const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/);
|
||||||
'.phonetic', // 主要音标类
|
|
||||||
'.pronounce', // 发音区域
|
|
||||||
'[class*="phonetic"]', // 包含 phonetic 的类
|
|
||||||
'.word-info .phonetic' // 单词信息区的音标
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const selector of selectors) {
|
|
||||||
const elements = doc.querySelectorAll(selector);
|
|
||||||
for (const el of elements) {
|
|
||||||
const text = el.textContent?.trim();
|
|
||||||
if (text && (text.includes('/') || text.includes('['))) {
|
|
||||||
return text;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 正则提取 fallback
|
|
||||||
const bodyText = doc.body?.textContent || '';
|
|
||||||
const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/);
|
|
||||||
if (match) {
|
if (match) {
|
||||||
return match[0];
|
return match[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//);
|
||||||
|
if (match2) {
|
||||||
|
return match2[0];
|
||||||
|
}
|
||||||
|
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
@ -131,50 +103,42 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
/**
|
/**
|
||||||
* 提取释义
|
* 提取释义
|
||||||
* @private
|
* @private
|
||||||
* @param {Document} doc - HTML 文档
|
* @param {string} html - HTML 内容
|
||||||
* @returns {Array<Meaning>} 释义列表
|
* @returns {Array<Meaning>} 释义列表
|
||||||
*/
|
*/
|
||||||
_extractMeanings(doc) {
|
_extractMeanings(html) {
|
||||||
const meanings = [];
|
const meanings = [];
|
||||||
|
|
||||||
// 尝试多个可能的选择器
|
// 尝试匹配常见的词典释义格式
|
||||||
const selectors = [
|
const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi;
|
||||||
'.trans-container ul li', // 主要释义列表
|
|
||||||
'.basic .word-exp', // 基本释义
|
let match;
|
||||||
'.meaning li', // 备选释义
|
while ((match = posDefPattern.exec(html)) !== null) {
|
||||||
'[class*="meaning"] li', // 包含 meaning 的类
|
const partOfSpeech = match[1].trim();
|
||||||
'.content ul li' // 通用内容列表
|
const definition = match[2].trim();
|
||||||
];
|
if (partOfSpeech && definition) {
|
||||||
|
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||||
for (const selector of selectors) {
|
|
||||||
const items = doc.querySelectorAll(selector);
|
|
||||||
|
|
||||||
for (const item of items) {
|
|
||||||
const text = item.textContent?.trim();
|
|
||||||
if (!text) continue;
|
|
||||||
|
|
||||||
// 尝试匹配词性和释义
|
|
||||||
const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/);
|
|
||||||
if (match) {
|
|
||||||
const partOfSpeech = match[1];
|
|
||||||
const defsText = match[2];
|
|
||||||
|
|
||||||
// 分割多个释义
|
|
||||||
const definitions = defsText
|
|
||||||
.split(/[;;]/)
|
|
||||||
.map(d => d.trim())
|
|
||||||
.filter(d => d.length > 0);
|
|
||||||
|
|
||||||
if (definitions.length > 0) {
|
|
||||||
meanings.push(createMeaning(partOfSpeech, definitions));
|
|
||||||
}
|
|
||||||
} else if (text.length > 0 && text.length < 50) {
|
|
||||||
// 没有词性标记的释义
|
|
||||||
meanings.push(createMeaning('', [text]));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (meanings.length > 0) break;
|
// 备选:直接匹配 "词性. 释义" 格式
|
||||||
|
if (meanings.length === 0) {
|
||||||
|
const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi;
|
||||||
|
const seen = new Set();
|
||||||
|
|
||||||
|
while ((match = simplePattern.exec(html)) !== null) {
|
||||||
|
const partOfSpeech = match[1].trim();
|
||||||
|
const definition = match[2].trim();
|
||||||
|
|
||||||
|
if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue;
|
||||||
|
if (definition.length < 2 || definition.length > 30) continue;
|
||||||
|
if (seen.has(definition)) continue;
|
||||||
|
|
||||||
|
seen.add(definition);
|
||||||
|
meanings.push(createMeaning(partOfSpeech, [definition]));
|
||||||
|
|
||||||
|
if (meanings.length >= 5) break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
|
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
|
||||||
@ -183,39 +147,30 @@ export class YoudaoDictionary extends DictionaryBase {
|
|||||||
/**
|
/**
|
||||||
* 提取例句
|
* 提取例句
|
||||||
* @private
|
* @private
|
||||||
* @param {Document} doc - HTML 文档
|
* @param {string} html - HTML 内容
|
||||||
* @returns {Array<Example>} 例句列表
|
* @returns {Array<Example>} 例句列表
|
||||||
*/
|
*/
|
||||||
_extractExamples(doc) {
|
_extractExamples(html) {
|
||||||
const examples = [];
|
const examples = [];
|
||||||
|
|
||||||
// 尝试多个可能的选择器
|
// 匹配例句模式
|
||||||
const selectors = [
|
const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi;
|
||||||
'.examples li', // 主要例句列表
|
|
||||||
'.example-item', // 例句项
|
let match;
|
||||||
'.sentence', // 句子区域
|
const seen = new Set();
|
||||||
'[class*="example"] li', // 包含 example 的类
|
|
||||||
'.content .ex_li' // 通用例句
|
while ((match = sentencePattern.exec(html)) !== null) {
|
||||||
];
|
const sentence = match[1].trim();
|
||||||
|
const translation = match[2].trim();
|
||||||
for (const selector of selectors) {
|
|
||||||
const items = doc.querySelectorAll(selector);
|
|
||||||
|
|
||||||
for (const item of items) {
|
if (seen.has(sentence)) continue;
|
||||||
const enEl = item.querySelector('.en-sentence, .english, [class*="english"]');
|
seen.add(sentence);
|
||||||
const cnEl = item.querySelector('.cn-sentence, .chinese, [class*="chinese"]');
|
|
||||||
|
if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) {
|
||||||
const sentence = enEl?.textContent?.trim() || item.textContent?.trim();
|
examples.push(createExample(sentence, translation));
|
||||||
const translation = cnEl?.textContent?.trim() || '';
|
|
||||||
|
|
||||||
if (sentence) {
|
|
||||||
examples.push(createExample(sentence, translation));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (examples.length >= 2) break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (examples.length > 0) break;
|
if (examples.length >= 2) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return examples;
|
return examples;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user