228 lines
6.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* @file 有道词典实现
* @description 有道词典的实现,通过 HTTP 请求获取数据
*/
import { DictionaryBase, createResult, createMeaning, createExample } from './base.js';
/**
* 有道词典实现
*/
export class YoudaoDictionary extends DictionaryBase {
constructor(config = {}) {
super({
name: '有道词典',
icon: 'icons/youdao.png',
languages: ['en', 'zh'],
...config
});
}
/**
* 查询单词
* @param {string} word - 要查询的单词
* @returns {Promise<DictionaryResult>} 查询结果
*/
async search(word) {
if (!word?.trim()) {
throw new Error('查询单词不能为空');
}
const trimmedWord = word.trim();
const url = `https://dict.youdao.com/result?word=${encodeURIComponent(trimmedWord)}&lang=en`;
try {
const response = await fetch(url, {
method: 'GET',
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
}
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const html = await response.text();
return this._parseHtml(html, trimmedWord, url);
} catch (error) {
console.error('[YoudaoDictionary] Search failed:', error);
return createResult({
word: trimmedWord,
phonetic: '',
meanings: [createMeaning('提示', ['查询失败,请检查网络连接'])],
examples: [],
url
});
}
}
/**
* 去除 HTML 标签
* @private
* @param {string} html - 包含 HTML 的字符串
* @returns {string} 纯文本
*/
_stripHtml(html) {
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
}
/**
* 解析有道词典 HTML
* @private
* @param {string} html - HTML 内容
* @param {string} word - 查询的单词
* @param {string} url - 查询 URL
* @returns {DictionaryResult} 解析结果
*/
_parseHtml(html, word, url) {
return createResult({
word,
phonetic: this._extractPhonetic(html),
meanings: this._extractMeanings(html),
examples: this._extractExamples(html),
url
});
}
/**
* 提取音标
* @private
* @param {string} html - HTML 内容
* @returns {string} 音标
*/
_extractPhonetic(html) {
// 有道词典音标通常在 phonetic 或 pron 类中
const patterns = [
/<span[^>]*class="[^"]*phonetic[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*pron[^"]*"[^>]*>(\[[^\]]+\])<\/span>/i,
/<span[^>]*class="[^"]*phone[^"]*"[^>]*>([^<]+)<\/span>/i
];
for (const pattern of patterns) {
const match = html.match(pattern);
if (match) {
return match[1].trim();
}
}
// 通用音标匹配
const genericMatch = html.match(/(\[[\u0250-\u02AEˈˌa-zA-Z\s]+\])/);
if (genericMatch) {
return genericMatch[1];
}
return '';
}
/**
* 提取释义
* @private
* @param {string} html - HTML 内容
* @returns {Array<Meaning>} 释义列表
*/
_extractMeanings(html) {
const meanings = [];
const seen = new Set();
// 有道词典释义结构:
// <li><span class="pos">n.</span> <span class="trans">释义</span></li>
// 模式1: 标准释义格式
const defPattern = /<li[^>]*>\s*<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>\s*<span[^>]*class="[^"]*trans[^"]*"[^>]*>([^<]+)<\/span>/gi;
let match;
while ((match = defPattern.exec(html)) !== null) {
const pos = this._stripHtml(match[1]).trim();
const def = this._stripHtml(match[2]).trim();
if (pos && def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
}
}
// 模式2: 备选释义格式 (trans-container 中的释义)
if (meanings.length === 0) {
const containerPattern = /<div[^>]*class="[^"]*trans-container[^"]*"[^>]*>(.*?)<\/ul>/i;
const containerMatch = html.match(containerPattern);
if (containerMatch) {
const container = containerMatch[1];
// 提取所有 li 项
const liPattern = /<li[^>]*>(.*?)<\/li>/gi;
const liMatches = [...container.matchAll(liPattern)];
for (const liMatch of liMatches) {
const content = liMatch[1];
// 提取词性
const posMatch = content.match(/<span[^>]*class="[^"]*pos[^"]*"[^>]*>([^<]+)<\/span>/i);
// 提取释义
const transMatch = content.match(/<span[^>]*class="[^"]*(?:trans|chn)[^"]*"[^>]*>([^<]+)<\/span>/i);
const pos = posMatch ? this._stripHtml(posMatch[1]).trim() : 'n.';
const def = transMatch ? this._stripHtml(transMatch[1]).trim() : this._stripHtml(content);
if (def && !seen.has(`${pos}-${def}`)) {
seen.add(`${pos}-${def}`);
meanings.push(createMeaning(pos, [def]));
}
}
}
}
return meanings.length > 0 ? meanings : [createMeaning('n.', ['暂无释义'])];
}
/**
* 提取例句
* @private
* @param {string} html - HTML 内容
* @returns {Array<Example>} 例句列表
*/
_extractExamples(html) {
const examples = [];
const seen = new Set();
// 有道例句格式:
// <li class="examples_li">
// <p class="examples_p">英文例句</p>
// <p class="examples_p">中文翻译</p>
// </li>
// 提取例句块
const senPattern = /<li[^>]*class="[^"]*examples_li[^"]*"[^>]*>(.*?)<\/li>/gi;
const senMatches = [...html.matchAll(senPattern)];
for (const senMatch of senMatches) {
const senBlock = senMatch[1];
// 提取所有 p 标签内容
const pPattern = /<p[^>]*class="[^"]*examples_p[^"]*"[^>]*>(.*?)<\/p>/gi;
const pMatches = [...senBlock.matchAll(pPattern)];
if (pMatches.length >= 2) {
const sentence = this._stripHtml(pMatches[0][1]).trim();
const translation = this._stripHtml(pMatches[1][1]).trim();
if (sentence && !sentence.includes('<') && !seen.has(sentence)) {
seen.add(sentence);
examples.push(createExample(sentence, translation));
}
}
if (examples.length >= 2) break;
}
return examples;
}
}
// 导出单例实例
export const youdaoDictionary = new YoudaoDictionary();