diff --git a/src/shared/dictionary/bing.js b/src/shared/dictionary/bing.js index 3f8f18d..86ccf2c 100644 --- a/src/shared/dictionary/bing.js +++ b/src/shared/dictionary/bing.js @@ -32,7 +32,6 @@ export class BingDictionary extends DictionaryBase { const url = `https://cn.bing.com/dict/search?q=${encodeURIComponent(trimmedWord)}`; try { - // 在 Background 中直接使用 fetch const response = await fetch(url, { method: 'GET', headers: { @@ -47,12 +46,11 @@ export class BingDictionary extends DictionaryBase { const html = await response.text(); - // 解析 HTML 提取数据 + // 使用正则提取数据 return this._parseHtml(html, trimmedWord, url); } catch (error) { console.error('[BingDictionary] Search failed:', error); - // 返回友好错误提示 return createResult({ word: trimmedWord, phonetic: '', @@ -64,7 +62,7 @@ export class BingDictionary extends DictionaryBase { } /** - * 解析必应词典 HTML + * 解析必应词典 HTML(使用正则) * @private * @param {string} html - HTML 内容 * @param {string} word - 查询的单词 @@ -72,23 +70,11 @@ export class BingDictionary extends DictionaryBase { * @returns {DictionaryResult} 解析结果 */ _parseHtml(html, word, url) { - const parser = new DOMParser(); - const doc = parser.parseFromString(html, 'text/html'); - - // 提取音标 - const phonetic = this._extractPhonetic(doc); - - // 提取释义 - const meanings = this._extractMeanings(doc); - - // 提取例句 - const examples = this._extractExamples(doc); - return createResult({ word, - phonetic, - meanings, - examples, + phonetic: this._extractPhonetic(html), + meanings: this._extractMeanings(html), + examples: this._extractExamples(html), url }); } @@ -96,34 +82,21 @@ export class BingDictionary extends DictionaryBase { /** * 提取音标 * @private - * @param {Document} doc - HTML 文档 + * @param {string} html - HTML 内容 * @returns {string} 音标 */ - _extractPhonetic(doc) { - // 尝试多个可能的选择器 - const selectors = [ - '.hd_p1_1F_OWM', // 主要音标容器 - '.hd_tf_lh', // 音标文本 - '[class*="phonetic"]', // 包含 phonetic 的类 - '.prons' // 发音区域 - ]; - - for (const selector of selectors) { - const elements = doc.querySelectorAll(selector); - for (const el of elements) { - const text = el.textContent?.trim(); - if (text && text.includes('/')) { - return text; - } - } - } - - // 正则提取 fallback - const bodyText = doc.body?.textContent || ''; - const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/); + _extractPhonetic(html) { + // 匹配音标格式如 [həˈləʊ] 或 /həˈləʊ/ + const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/); if (match) { return match[0]; } + + // 备选:匹配 /.../ 格式 + const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//); + if (match2) { + return match2[0]; + } return ''; } @@ -131,46 +104,44 @@ export class BingDictionary extends DictionaryBase { /** * 提取释义 * @private - * @param {Document} doc - HTML 文档 + * @param {string} html - HTML 内容 * @returns {Array} 释义列表 */ - _extractMeanings(doc) { + _extractMeanings(html) { const meanings = []; - // 尝试多个可能的选择器 - const selectors = [ - '.qdef ul li', // 主要释义列表 - '.def li', // 备选释义 - '[class*="meaning"] li', // 包含 meaning 的类 - '.content ul li' // 通用内容列表 - ]; - - for (const selector of selectors) { - const items = doc.querySelectorAll(selector); - - for (const item of items) { - const text = item.textContent?.trim(); - if (!text) continue; - - // 尝试匹配词性和释义 - const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/); - if (match) { - const partOfSpeech = match[1]; - const defsText = match[2]; - - // 分割多个释义 - const definitions = defsText - .split(/[;;]/) - .map(d => d.trim()) - .filter(d => d.length > 0); - - if (definitions.length > 0) { - meanings.push(createMeaning(partOfSpeech, definitions)); - } - } + // 尝试匹配常见的词典释义格式 + // 格式1: n.定义 + const posDefPattern = /<[^>]*class="[^"]*(?:pos|web)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|tran)[^"]*"[^>]*>([^<]+)/gi; + + let match; + while ((match = posDefPattern.exec(html)) !== null) { + const partOfSpeech = match[1].trim(); + const definition = match[2].trim(); + if (partOfSpeech && definition) { + meanings.push(createMeaning(partOfSpeech, [definition])); } + } - if (meanings.length > 0) break; + // 格式2: 直接匹配 "词性. 释义" 格式 + if (meanings.length === 0) { + const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi; + const seen = new Set(); + + while ((match = simplePattern.exec(html)) !== null) { + const partOfSpeech = match[1].trim(); + const definition = match[2].trim(); + + // 过滤无效结果 + if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue; + if (definition.length < 2 || definition.length > 30) continue; + if (seen.has(definition)) continue; + + seen.add(definition); + meanings.push(createMeaning(partOfSpeech, [definition])); + + if (meanings.length >= 5) break; + } } return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])]; @@ -179,38 +150,32 @@ export class BingDictionary extends DictionaryBase { /** * 提取例句 * @private - * @param {Document} doc - HTML 文档 + * @param {string} html - HTML 内容 * @returns {Array} 例句列表 */ - _extractExamples(doc) { + _extractExamples(html) { const examples = []; - // 尝试多个可能的选择器 - const selectors = [ - '.sen_li', // 主要例句容器 - '.sentences li', // 备选例句 - '[class*="example"] li', // 包含 example 的类 - '.content .ex_li' // 通用例句 - ]; - - for (const selector of selectors) { - const items = doc.querySelectorAll(selector); + // 匹配例句模式:英文句子后跟中文翻译 + // 尝试匹配
  • 中的例句 + const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi; + + let match; + const seen = new Set(); + + while ((match = sentencePattern.exec(html)) !== null) { + const sentence = match[1].trim(); + const translation = match[2].trim(); - for (const item of items) { - const enEl = item.querySelector('.sen_en, .en_sent, [class*="english"]'); - const cnEl = item.querySelector('.sen_cn, .cn_sent, [class*="chinese"]'); - - const sentence = enEl?.textContent?.trim() || item.textContent?.trim(); - const translation = cnEl?.textContent?.trim() || ''; - - if (sentence) { - examples.push(createExample(sentence, translation)); - } - - if (examples.length >= 2) break; + if (seen.has(sentence)) continue; + seen.add(sentence); + + // 验证:英文句子应该包含空格且长度合适 + if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) { + examples.push(createExample(sentence, translation)); } - - if (examples.length > 0) break; + + if (examples.length >= 2) break; } return examples; diff --git a/src/shared/dictionary/youdao.js b/src/shared/dictionary/youdao.js index 6170d62..9555258 100644 --- a/src/shared/dictionary/youdao.js +++ b/src/shared/dictionary/youdao.js @@ -32,7 +32,6 @@ export class YoudaoDictionary extends DictionaryBase { const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`; try { - // 在 Background 中直接使用 fetch const response = await fetch(url, { method: 'GET', headers: { @@ -47,12 +46,11 @@ export class YoudaoDictionary extends DictionaryBase { const html = await response.text(); - // 解析 HTML 提取数据 + // 使用正则提取数据 return this._parseHtml(html, trimmedWord, url); } catch (error) { console.error('[YoudaoDictionary] Search failed:', error); - // 返回友好错误提示 return createResult({ word: trimmedWord, phonetic: '', @@ -64,7 +62,7 @@ export class YoudaoDictionary extends DictionaryBase { } /** - * 解析有道词典 HTML + * 解析有道词典 HTML(使用正则) * @private * @param {string} html - HTML 内容 * @param {string} word - 查询的单词 @@ -72,23 +70,11 @@ export class YoudaoDictionary extends DictionaryBase { * @returns {DictionaryResult} 解析结果 */ _parseHtml(html, word, url) { - const parser = new DOMParser(); - const doc = parser.parseFromString(html, 'text/html'); - - // 提取音标 - const phonetic = this._extractPhonetic(doc); - - // 提取释义 - const meanings = this._extractMeanings(doc); - - // 提取例句 - const examples = this._extractExamples(doc); - return createResult({ word, - phonetic, - meanings, - examples, + phonetic: this._extractPhonetic(html), + meanings: this._extractMeanings(html), + examples: this._extractExamples(html), url }); } @@ -96,34 +82,20 @@ export class YoudaoDictionary extends DictionaryBase { /** * 提取音标 * @private - * @param {Document} doc - HTML 文档 + * @param {string} html - HTML 内容 * @returns {string} 音标 */ - _extractPhonetic(doc) { - // 尝试多个可能的选择器 - const selectors = [ - '.phonetic', // 主要音标类 - '.pronounce', // 发音区域 - '[class*="phonetic"]', // 包含 phonetic 的类 - '.word-info .phonetic' // 单词信息区的音标 - ]; - - for (const selector of selectors) { - const elements = doc.querySelectorAll(selector); - for (const el of elements) { - const text = el.textContent?.trim(); - if (text && (text.includes('/') || text.includes('['))) { - return text; - } - } - } - - // 正则提取 fallback - const bodyText = doc.body?.textContent || ''; - const match = bodyText.match(/\[[\u0250-\u02AEˈˌ]+\]/); + _extractPhonetic(html) { + // 匹配音标格式 + const match = html.match(/\[[\u0250-\u02AEˈˌa-zA-Z]+\]/); if (match) { return match[0]; } + + const match2 = html.match(/\/[\u0250-\u02AEˈˌa-zA-Z]+\//); + if (match2) { + return match2[0]; + } return ''; } @@ -131,50 +103,42 @@ export class YoudaoDictionary extends DictionaryBase { /** * 提取释义 * @private - * @param {Document} doc - HTML 文档 + * @param {string} html - HTML 内容 * @returns {Array} 释义列表 */ - _extractMeanings(doc) { + _extractMeanings(html) { const meanings = []; - // 尝试多个可能的选择器 - const selectors = [ - '.trans-container ul li', // 主要释义列表 - '.basic .word-exp', // 基本释义 - '.meaning li', // 备选释义 - '[class*="meaning"] li', // 包含 meaning 的类 - '.content ul li' // 通用内容列表 - ]; - - for (const selector of selectors) { - const items = doc.querySelectorAll(selector); - - for (const item of items) { - const text = item.textContent?.trim(); - if (!text) continue; - - // 尝试匹配词性和释义 - const match = text.match(/^([a-zA-Z]+\.?)\s*(.+)$/); - if (match) { - const partOfSpeech = match[1]; - const defsText = match[2]; - - // 分割多个释义 - const definitions = defsText - .split(/[;;]/) - .map(d => d.trim()) - .filter(d => d.length > 0); - - if (definitions.length > 0) { - meanings.push(createMeaning(partOfSpeech, definitions)); - } - } else if (text.length > 0 && text.length < 50) { - // 没有词性标记的释义 - meanings.push(createMeaning('', [text])); - } + // 尝试匹配常见的词典释义格式 + const posDefPattern = /<[^>]*class="[^"]*(?:pos|trans)[^"]*"[^>]*>([^<]+)<\/[^>]*>\s*<[^>]*class="[^"]*(?:def|chn)[^"]*"[^>]*>([^<]+)/gi; + + let match; + while ((match = posDefPattern.exec(html)) !== null) { + const partOfSpeech = match[1].trim(); + const definition = match[2].trim(); + if (partOfSpeech && definition) { + meanings.push(createMeaning(partOfSpeech, [definition])); } + } - if (meanings.length > 0) break; + // 备选:直接匹配 "词性. 释义" 格式 + if (meanings.length === 0) { + const simplePattern = /([a-z]+\.?)\s*([^<\n]{2,30})/gi; + const seen = new Set(); + + while ((match = simplePattern.exec(html)) !== null) { + const partOfSpeech = match[1].trim(); + const definition = match[2].trim(); + + if (!partOfSpeech.match(/^[a-z]+\.?$/i)) continue; + if (definition.length < 2 || definition.length > 30) continue; + if (seen.has(definition)) continue; + + seen.add(definition); + meanings.push(createMeaning(partOfSpeech, [definition])); + + if (meanings.length >= 5) break; + } } return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])]; @@ -183,39 +147,30 @@ export class YoudaoDictionary extends DictionaryBase { /** * 提取例句 * @private - * @param {Document} doc - HTML 文档 + * @param {string} html - HTML 内容 * @returns {Array} 例句列表 */ - _extractExamples(doc) { + _extractExamples(html) { const examples = []; - // 尝试多个可能的选择器 - const selectors = [ - '.examples li', // 主要例句列表 - '.example-item', // 例句项 - '.sentence', // 句子区域 - '[class*="example"] li', // 包含 example 的类 - '.content .ex_li' // 通用例句 - ]; - - for (const selector of selectors) { - const items = doc.querySelectorAll(selector); + // 匹配例句模式 + const sentencePattern = /<[^>]*>([^<]{10,100}[a-zA-Z][^<]{0,50})<\/[^>]*>\s*<[^>]*>([^<]{5,50}[\u4e00-\u9fa5][^<]{0,50})<\/[^>]*>/gi; + + let match; + const seen = new Set(); + + while ((match = sentencePattern.exec(html)) !== null) { + const sentence = match[1].trim(); + const translation = match[2].trim(); - for (const item of items) { - const enEl = item.querySelector('.en-sentence, .english, [class*="english"]'); - const cnEl = item.querySelector('.cn-sentence, .chinese, [class*="chinese"]'); - - const sentence = enEl?.textContent?.trim() || item.textContent?.trim(); - const translation = cnEl?.textContent?.trim() || ''; - - if (sentence) { - examples.push(createExample(sentence, translation)); - } - - if (examples.length >= 2) break; + if (seen.has(sentence)) continue; + seen.add(sentence); + + if (sentence.length > 10 && sentence.length < 150 && sentence.includes(' ')) { + examples.push(createExample(sentence, translation)); } - - if (examples.length > 0) break; + + if (examples.length >= 2) break; } return examples;