perf(extension): avoid heavy page detection on startup (#1382)

This commit is contained in:
MengXi 2026-04-24 19:59:35 -07:00 committed by GitHub
commit 068bdecc8a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 442 additions and 1002 deletions

View file

@ -0,0 +1,5 @@
---
"@read-frog/extension": patch
---
perf: replace startup Readability parsing with lightweight page detection

View file

@ -46,7 +46,6 @@ Master languages effortlessly and deeply with AI, right in your browser.
- [🤖 20+ AI Providers](#-20-ai-providers)
- [🎬 Subtitle Translation](#-subtitle-translation)
- [🔊 Text-to-Speech (TTS)](#-text-to-speech-tts)
- [📖 Read Article](#-read-article)
- [🤝 Contribute](#-contribute)
- [Contribute Code](#contribute-code)
- [📜 Commercial License Grant](#-commercial-license-grant)
@ -134,7 +133,7 @@ The extension automatically re-translates all visible content when you switch mo
### 🧠 [Context-Aware Translation][docs-tutorial]
Enable AI to understand the full context of what you're reading. When activated, Read Frog uses Mozilla's Readability library to extract the article's title and content, providing this context to the AI for more accurate, contextually-appropriate translations.
Enable AI to understand the full context of what you're reading. When activated, Read Frog extracts the page title and a concise Markdown version of the page content, providing this context to the AI for more accurate, contextually-appropriate translations.
This means technical terms get translated correctly within their domain, literary expressions maintain their nuance, and ambiguous phrases are interpreted based on the surrounding content rather than in isolation.
@ -226,20 +225,6 @@ Automatic language detection (basic or LLM-powered) with per-language voice mapp
</div>
<!-- ![][image-feat-read] -->
### 📖 [Read Article][docs-tutorial]
One-click deep article analysis. Read Frog extracts the main content using Mozilla's Readability, detects the source language, and generates a summary and introduction in your target language.
Then it provides sentence-by-sentence translations with vocabulary explanations tailored to your language level (beginner, intermediate, or advanced). Each sentence includes key word definitions, grammatical analysis, and contextual explanations. It's like having a personal language tutor analyze every article you read.
<div align="right">
[![Back to top][back-to-top]](#readme-top)
</div>
## 🤝 Contribute
Contributions of all types are more than welcome.

View file

@ -46,7 +46,6 @@
- [🤖 20+ AI 服务商](#-20-ai-服务商)
- [🎬 字幕翻译](#-字幕翻译)
- [🔊 文字转语音 (TTS)](#-文字转语音-tts)
- [📖 阅读文章](#-阅读文章)
- [🤝 贡献](#-贡献)
- [贡献代码](#贡献代码)
- [📜 商业授权](#-商业授权)
@ -134,7 +133,7 @@ Read Frog 的愿景是为各个级别的语言学习者提供易于使用、智
### 🧠 [上下文感知翻译][docs-tutorial]
让 AI 理解您正在阅读内容的完整上下文。启用后Read Frog 使用 Mozilla 的 Readability 库提取文章的标题和内容,将此上下文提供给 AI以获得更准确、更符合语境的翻译。
让 AI 理解您正在阅读内容的完整上下文。启用后Read Frog 会提取页面标题和简洁的 Markdown 页面内容,将此上下文提供给 AI以获得更准确、更符合语境的翻译。
这意味着技术术语会在其领域内被正确翻译,文学表达会保持其韵味,歧义短语会根据周围内容而非孤立地进行解释。
@ -226,20 +225,6 @@ Read Frog 的愿景是为各个级别的语言学习者提供易于使用、智
</div>
<!-- ![][image-feat-read] -->
### 📖 [阅读文章][docs-tutorial]
一键深度文章分析。Read Frog 使用 Mozilla 的 Readability 提取主要内容,检测源语言,并用您的目标语言生成摘要和导读。
然后提供逐句翻译,配合根据您的语言水平(初级、中级或高级)定制的词汇解释。每个句子都包含关键词定义、语法分析和上下文解释。就像有一位私人语言导师分析您阅读的每篇文章。
<div align="right">
[![Back to top][back-to-top]](#readme-top)
</div>
## 🤝 贡献
我们欢迎各种类型的贡献。

View file

@ -66,7 +66,6 @@
"@headless-tree/react": "^1.6.3",
"@json-render/core": "^0.18.0",
"@json-render/react": "^0.18.0",
"@mozilla/readability": "^0.6.0",
"@openrouter/ai-sdk-provider": "^2.8.0",
"@orpc/client": "^1.14.0",
"@orpc/tanstack-query": "^1.14.0",
@ -93,6 +92,7 @@
"css-tree": "^3.2.1",
"debounce": "^3.0.0",
"deepmerge-ts": "^7.1.5",
"defuddle": "^0.18.1",
"dequal": "^2.0.3",
"dexie": "^4.4.2",
"file-saver": "^2.0.5",

70
pnpm-lock.yaml generated
View file

@ -116,9 +116,6 @@ importers:
'@json-render/react':
specifier: ^0.18.0
version: 0.18.0(react@19.2.5)(zod@4.3.6)
'@mozilla/readability':
specifier: ^0.6.0
version: 0.6.0
'@openrouter/ai-sdk-provider':
specifier: ^2.8.0
version: 2.8.0(ai@6.0.168(zod@4.3.6))(zod@4.3.6)
@ -197,6 +194,9 @@ importers:
deepmerge-ts:
specifier: ^7.1.5
version: 7.1.5
defuddle:
specifier: ^0.18.1
version: 0.18.1
dequal:
specifier: ^2.0.3
version: 2.0.3
@ -1671,6 +1671,9 @@ packages:
'@marijn/find-cluster-break@1.0.2':
resolution: {integrity: sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==}
'@mixmark-io/domino@2.2.0':
resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==}
'@modelcontextprotocol/sdk@1.29.0':
resolution: {integrity: sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==}
engines: {node: '>=18'}
@ -1684,10 +1687,6 @@ packages:
'@mongodb-js/saslprep@1.4.9':
resolution: {integrity: sha512-RXSxsokhAF/4nWys8An8npsqOI33Ex1Hlzqjw2pZOO+GKtMAR2noGnUdsFiGwsaO/xXI+56mtjTmDA3JXJsvmA==}
'@mozilla/readability@0.6.0':
resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==}
engines: {node: '>=14.0.0'}
'@mswjs/interceptors@0.41.4':
resolution: {integrity: sha512-3B9EinUkrdOUGYzHRzRWSXunQ4YFGboJnyLNRwEJWEde+j8fNhPUHvrN1E3g1DU/iS/s8JQrMNVe+S7AHHVs0w==}
engines: {node: '>=18'}
@ -3264,6 +3263,10 @@ packages:
'@wxt-dev/storage@1.2.8':
resolution: {integrity: sha512-GWCFKgF5+d7eslOxUDFC70ypA9njupmJb1nQM8uZoX0J3sWT2BO5xJLzb1sYahWAfID9p2BMtnUBN1lkWxPsbQ==}
'@xmldom/xmldom@0.8.13':
resolution: {integrity: sha512-KRYzxepc14G/CEpEGc3Yn+JKaAeT63smlDr+vjB8jRfgTBBI9wRj/nkQEO+ucV8p8I9bfKLWp37uHgFrbntPvw==}
engines: {node: '>=10.0.0'}
'@yarnpkg/lockfile@1.1.0':
resolution: {integrity: sha512-GpSwvyXOcOOlV70vbnzjj4fW5xW/FdUF6nQEt1ENy7m4ZCczi1+/buVUPAqmGfqznsORNFzUMjctTIp8a9tuCQ==}
@ -3770,6 +3773,10 @@ packages:
resolution: {integrity: sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==}
engines: {node: '>=16'}
commander@12.1.0:
resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==}
engines: {node: '>=18'}
commander@14.0.3:
resolution: {integrity: sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw==}
engines: {node: '>=20'}
@ -4062,6 +4069,10 @@ packages:
defu@6.1.7:
resolution: {integrity: sha512-7z22QmUWiQ/2d0KkdYmANbRUVABpZ9SNYyH5vx6PZ+nE5bcC0l7uFvEfHlyld/HcGBFTL536ClDt3DEcSlEJAQ==}
defuddle@0.18.1:
resolution: {integrity: sha512-AvFPFOsoDjt5xUOA1QxzafSSzJ5dqEIC63yO72tHYtSjj1DYY/XM0XTPUCsHkm5A2f1X9ulBvoSVFJrd4s2ckA==}
hasBin: true
delayed-stream@1.0.0:
resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==}
engines: {node: '>=0.4.0'}
@ -5634,6 +5645,9 @@ packages:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
mathml-to-latex@1.5.0:
resolution: {integrity: sha512-rrWn0eEvcEcdMM4xfHcSGIy+i01DX9byOdXTLWg+w1iJ6O6ohP5UXY1dVzNUZLhzfl3EGcRekWLhY7JT5Omaew==}
mdast-util-find-and-replace@3.0.2:
resolution: {integrity: sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==}
@ -7056,6 +7070,10 @@ packages:
resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==}
engines: {node: '>=6'}
temml@0.13.2:
resolution: {integrity: sha512-n8fDRSsLscq9nh9j6z+FgkCvFMT0IJm6GCgwfzh+7AHT3Sfb4jFTQlsA6hVcF2dYYr3b66oDBVES95RfoukyrA==}
engines: {node: '>=18.13.0'}
term-size@2.2.1:
resolution: {integrity: sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg==}
engines: {node: '>=8'}
@ -7182,6 +7200,10 @@ packages:
tslib@2.8.1:
resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
turndown@7.2.4:
resolution: {integrity: sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==}
engines: {node: '>=18', npm: '>=9'}
tw-animate-css@1.4.0:
resolution: {integrity: sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==}
@ -9140,6 +9162,9 @@ snapshots:
'@marijn/find-cluster-break@1.0.2': {}
'@mixmark-io/domino@2.2.0':
optional: true
'@modelcontextprotocol/sdk@1.29.0(zod@3.25.76)':
dependencies:
'@hono/node-server': 1.19.14(hono@4.12.14)
@ -9167,8 +9192,6 @@ snapshots:
sparse-bitfield: 3.0.3
optional: true
'@mozilla/readability@0.6.0': {}
'@mswjs/interceptors@0.41.4':
dependencies:
'@open-draft/deferred-promise': 2.2.0
@ -10640,6 +10663,9 @@ snapshots:
async-mutex: 0.5.0
dequal: 2.0.3
'@xmldom/xmldom@0.8.13':
optional: true
'@yarnpkg/lockfile@1.1.0': {}
'@yarnpkg/parsers@3.0.2':
@ -11093,6 +11119,8 @@ snapshots:
commander@11.1.0: {}
commander@12.1.0: {}
commander@14.0.3: {}
commander@2.9.0:
@ -11327,6 +11355,17 @@ snapshots:
defu@6.1.7: {}
defuddle@0.18.1:
dependencies:
commander: 12.1.0
optionalDependencies:
linkedom: 0.18.12
mathml-to-latex: 1.5.0
temml: 0.13.2
turndown: 7.2.4
transitivePeerDependencies:
- canvas
delayed-stream@1.0.0: {}
depd@2.0.0: {}
@ -12931,6 +12970,11 @@ snapshots:
math-intrinsics@1.1.0: {}
mathml-to-latex@1.5.0:
dependencies:
'@xmldom/xmldom': 0.8.13
optional: true
mdast-util-find-and-replace@3.0.2:
dependencies:
'@types/mdast': 4.0.4
@ -14654,6 +14698,9 @@ snapshots:
inherits: 2.0.4
readable-stream: 3.6.2
temml@0.13.2:
optional: true
term-size@2.2.1: {}
thread-stream@3.1.0:
@ -14758,6 +14805,11 @@ snapshots:
tslib@2.8.1: {}
turndown@7.2.4:
dependencies:
'@mixmark-io/domino': 2.2.0
optional: true
tw-animate-css@1.4.0: {}
type-check@0.4.0:

View file

@ -3,7 +3,7 @@ import type { LangCodeISO6393 } from "@read-frog/definitions"
import type { Config } from "@/types/config/config"
import { storage } from "#imports"
import { DEFAULT_CONFIG, DETECTED_CODE_STORAGE_KEY } from "@/utils/constants/config"
import { getDocumentInfo } from "@/utils/content/analyze"
import { detectPageLanguageLightweight } from "@/utils/content/page-language"
import { ensurePresetStyles } from "@/utils/host/translate/ui/style-injector"
import { logger } from "@/utils/logger"
import { onMessage, sendMessage } from "@/utils/message"
@ -55,7 +55,7 @@ export async function bootstrapHostContent(ctx: ContentScriptContext, initialCon
}
// Only the top frame should detect and set language to avoid race conditions from iframes
if (window === window.top) {
const { detectedCodeOrUnd } = await getDocumentInfo()
const { detectedCodeOrUnd } = await detectPageLanguageLightweight()
const detectedCode: LangCodeISO6393 = detectedCodeOrUnd === "und" ? "eng" : detectedCodeOrUnd
await storage.setItem<LangCodeISO6393>(`local:${DETECTED_CODE_STORAGE_KEY}`, detectedCode)
// Notify background script that URL has changed, let it decide whether to automatically enable translation
@ -92,7 +92,7 @@ export async function bootstrapHostContent(ctx: ContentScriptContext, initialCon
// Only the top frame should detect and set language to avoid race conditions from iframes
if (window === window.top) {
const { detectedCodeOrUnd } = await getDocumentInfo()
const { detectedCodeOrUnd } = await detectPageLanguageLightweight()
const initialDetectedCode: LangCodeISO6393 = detectedCodeOrUnd === "und" ? "eng" : detectedCodeOrUnd
await storage.setItem<LangCodeISO6393>(`local:${DETECTED_CODE_STORAGE_KEY}`, initialDetectedCode)

View file

@ -1,190 +0,0 @@
const TRAILING_PUNCTUATION_RE = /[.!?,:;'"…)}\]]$/
const WHITESPACE_RUN_RE = /\s+/g
/**
* "块级叶子"
* @param {Node} root -
* @returns {string[]}
*/
export function flattenToParagraphs(root: Node) {
// —— 1. 定义哪些标签(或 computedStyle算"块级"
const semanticBlocks = new Set([
"p",
"article",
"section",
"figure",
"figcaption",
"blockquote",
"pre",
"ul",
"ol",
"li",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"div",
"header",
"footer",
"main",
"nav",
])
function isBlockLevel(node: Node): boolean {
// 只有元素节点才能是块级
if (node.nodeType !== Node.ELEMENT_NODE)
return false
const el = node as Element
// 如果标签名在列表里,或者 computedStyle.display=block
if (semanticBlocks.has(el.tagName.toLowerCase()))
return true
const disp = window.getComputedStyle(el).display
return disp === "block" || disp === "list-item"
}
function hasBlockDescendant(node: Node): boolean {
// 非元素节点不会有块级后代
if (node.nodeType !== Node.ELEMENT_NODE)
return false
const el = node as Element
// 检查子孙是否存在任一块级元素
for (let i = 0; i < el.children.length; i++) {
const child = el.children[i]
if (isBlockLevel(child) || hasBlockDescendant(child)) {
return true
}
}
return false
}
const paragraphs: string[] = []
// 获取元素的文本内容,同时考虑内联元素之间的空格
function getTextWithSpaces(element: Element): string {
let text = ""
// 为每个子节点递归处理
for (const child of element.childNodes) {
let childText = ""
if (child.nodeType === Node.TEXT_NODE) {
childText = child.textContent || ""
}
else if (child.nodeType === Node.ELEMENT_NODE) {
childText = getTextWithSpaces(child as Element)
}
if (
text.length > 0
&& !text.endsWith(" ")
&& !TRAILING_PUNCTUATION_RE.test(childText)
) {
text += " "
}
text += childText
// if (child.nodeType === Node.TEXT_NODE) {
// // 文本节点直接添加
// text += child.textContent || "";
// } else if (child.nodeType === Node.ELEMENT_NODE) {
// const childEl = child as Element;
// // 防止在已有空格的地方添加额外空格
// if (text.length > 0 && !text.endsWith(" ")) {
// text += " ";
// }
// text += getTextWithSpaces(childEl);
// // 在内联元素后添加空格,如果不是已以空格结尾且不是标点符号结尾
// if (!text.endsWith(" ") && !/[.!?,:;'"…)}\]]$/.test(text)) {
// text += " ";
// }
// }
}
return text
}
function walk(node: Node) {
// 跳过注释节点、处理指令等非内容节点
if (
node.nodeType !== Node.ELEMENT_NODE
&& node.nodeType !== Node.TEXT_NODE
) {
return
}
if (node.nodeType === Node.ELEMENT_NODE) {
const element = node as Element
// 如果它是一个"块级叶子",就提取成段落;否则下降
if (isBlockLevel(element) && !hasBlockDescendant(element)) {
// 使用新的方法获取文本,保留内联元素之间的空格
const raw = getTextWithSpaces(element).replace(WHITESPACE_RUN_RE, " ").trim()
if (raw?.length && raw.length > 20) {
// 可根据需求调整最小长度过滤
paragraphs.push(raw)
}
}
else {
// 继续遍历子节点
for (const child of element.childNodes) {
walk(child)
}
}
}
// 如果是文本节点,且其父容器也不是"块级叶子"时,可以视作一个独立段落
else if (node.nodeType === Node.TEXT_NODE) {
const txt = node.textContent?.replace(WHITESPACE_RUN_RE, " ").trim()
if (txt?.length && txt.length > 20) {
paragraphs.push(txt)
}
}
}
// 从 root 开始遍历
walk(root)
// 返回段落数组
return paragraphs
}
export function extractSeoInfo(doc: Document) {
const seoInfo = {
title: doc.title || "",
metaDescription:
doc.querySelector("meta[name=\"description\"]")?.getAttribute("content")
|| "",
metaKeywords:
doc.querySelector("meta[name=\"keywords\"]")?.getAttribute("content") || "",
canonicalUrl:
doc.querySelector("link[rel=\"canonical\"]")?.getAttribute("href") || "",
ogTitle:
doc.querySelector("meta[property=\"og:title\"]")?.getAttribute("content")
|| "",
ogDescription:
doc
.querySelector("meta[property=\"og:description\"]")
?.getAttribute("content") || "",
ogImage:
doc.querySelector("meta[property=\"og:image\"]")?.getAttribute("content")
|| "",
twitterCard:
doc.querySelector("meta[name=\"twitter:card\"]")?.getAttribute("content")
|| "",
twitterTitle:
doc
.querySelector("meta[name=\"twitter:title\"]")
?.getAttribute("content") || "",
h1Tags: Array.from(doc.querySelectorAll("h1"), h => h.textContent?.trim() || ""),
structuredData: Array.from(doc.querySelectorAll("script[type=\"application/ld+json\"]"), (script) => {
try {
return JSON.parse(script.textContent || "{}")
}
catch (e) {
console.error("Error parsing structured data:", e)
return {}
}
}),
}
return seoInfo
}

View file

@ -1,75 +0,0 @@
import type { ArticleExplanation, ArticleWord } from "@/types/content"
import type { DOWNLOAD_FILE_ITEMS } from "@/utils/constants/side"
import { saveAs } from "file-saver"
import { toast } from "sonner"
import { AST_TEMPLATE, MARKDOWN_TEMPLATE_TOKEN, PARAGRAPH_DEPTH, SENTENCE_TEMPLATE, WORDS_TEMPLATE } from "@/utils/constants/side"
export type DOWNLOAD_FILE_TYPES = keyof typeof DOWNLOAD_FILE_ITEMS
type ExplanationDataList = Array<ArticleExplanation["paragraphs"]>
type DOWNLOADER_MAP = Record<DOWNLOAD_FILE_TYPES, (explainDataList: ExplanationDataList, opts?: object) => void>
class Downloader {
title = document.title ?? "Untitled"
downloader: DOWNLOADER_MAP = {
md: this.downloadMarkdown,
}
download(explainDataList: ExplanationDataList, fileType: DOWNLOAD_FILE_TYPES, opts?: object) {
this.downloader[fileType].call(this, explainDataList, opts)
}
downloadMarkdown(explainDataList: ExplanationDataList) {
try {
const article = this.markdownParser(explainDataList)
const blob = new Blob([article], {
type: "text/plain",
})
saveAs(blob, `${this.title}.md`)
}
catch (error) {
if (error instanceof Error) {
toast.error(error.message)
}
else {
toast.error("Something went wrong when exporting...")
}
}
}
markdownParser(explainDataList: ExplanationDataList = []) {
const sentence = this.parseSentence(explainDataList)
return AST_TEMPLATE
.replace(MARKDOWN_TEMPLATE_TOKEN.title, this.title)
.replace(MARKDOWN_TEMPLATE_TOKEN.sentence, sentence)
}
parseSentence(explainDataList: ExplanationDataList = []) {
const list = explainDataList.flat(PARAGRAPH_DEPTH)
return list.reduce((sentence, paragraph, pIndex) => {
const words = paragraph.words ?? []
return sentence + SENTENCE_TEMPLATE
.replace(MARKDOWN_TEMPLATE_TOKEN.originalSentence, paragraph.originalSentence)
.replace(MARKDOWN_TEMPLATE_TOKEN.translatedSentence, paragraph.translatedSentence)
.replace(MARKDOWN_TEMPLATE_TOKEN.words, this.parseWords(words))
.replace(MARKDOWN_TEMPLATE_TOKEN.explanation, paragraph.explanation)
.replace(MARKDOWN_TEMPLATE_TOKEN.globalIndex, (pIndex + 1).toString())
}, "")
}
parseWords(words: ArticleWord[]) {
return words.reduce((text, word, wIndex) => {
return text + WORDS_TEMPLATE
.replace(MARKDOWN_TEMPLATE_TOKEN.wIndex, (wIndex + 1).toString())
.replace(MARKDOWN_TEMPLATE_TOKEN.word, word.word)
.replace(MARKDOWN_TEMPLATE_TOKEN.syntacticCategory, word.syntacticCategory)
.replace(MARKDOWN_TEMPLATE_TOKEN.explanation, word.explanation)
}, "")
}
}
export default new Downloader()

View file

@ -1,72 +1,3 @@
import { langCodeISO6393Schema } from "@read-frog/definitions"
import { z } from "zod"
export interface ExtractedContent {
article: {
title?: string | null | undefined
byline?: string | null | undefined
dir?: string | null | undefined
content?: Node | null | undefined
textContent?: string | null | undefined
length?: number | null | undefined
excerpt?: string | null | undefined
siteName?: string | null | undefined
lang: string | null | undefined
}
paragraphs: string[]
}
export const articleAnalysisSchema = z.object({
isArticle: z.boolean(),
detectedLang: langCodeISO6393Schema.or(z.literal("und")),
summary: z.string(),
introduction: z.string(),
terms: z.array(z.string()),
})
export const partOfSpeechAbbr = z.enum([
"n.", // noun
"pron.", // pronoun
"v.", // verb
"adj.", // adjective
"adv.", // adverb
"prep.", // preposition
"conj.", // conjunction
"interj.", // interjection
"det.", // determiner
"num.", // numeral
"part.", // particle
])
export const syntacticCategoryAbbr = partOfSpeechAbbr.or(z.enum(["ph."]))
export const articleWordSchema = z.object({
word: z.string(),
syntacticCategory: syntacticCategoryAbbr,
explanation: z.string(),
})
export const articleExplanationSchema = z.object({
paragraphs: z.array(
z.array(
z.object({
originalSentence: z.string(),
translatedSentence: z.string(),
words: z.array(
articleWordSchema,
),
explanation: z.string(),
}),
),
),
})
export type ArticleWord = z.infer<typeof articleWordSchema>
export type ArticleAnalysis = z.infer<typeof articleAnalysisSchema>
export type ArticleExplanation = z.infer<typeof articleExplanationSchema>
export type SyntacticCategoryAbbr = z.infer<typeof syntacticCategoryAbbr>
export interface WebPageContext {
webTitle: string
webContent?: string

View file

@ -1,49 +1,2 @@
export const MIN_SIDE_CONTENT_WIDTH = 420 // px
export const DEFAULT_SIDE_CONTENT_WIDTH = 420 // px
export const DOWNLOAD_FILE_ITEMS = {
md: {
label: "Markdown",
},
}
export const PARAGRAPH_DEPTH = 3
export enum MARKDOWN_TEMPLATE_TOKEN {
title = "{{ Read Frog: title }}",
sentence = "{{ Read Frog:sentence }}",
words = "{{ Read Frog:words }}",
explanation = "{{ Read Frog:explanation }}",
originalSentence = "{{ Read Frog:originalSentence }}",
translatedSentence = "{{ Read Frog:translatedSentence }}",
word = "{{ Read Frog:word }}",
syntacticCategory = "{{ Read Frog:syntacticCategory }}",
wIndex = "{{ Read Frog:wIndex }}",
globalIndex = "{{ Read Frog:globalIndex }}",
}
export const AST_TEMPLATE = `
# ${MARKDOWN_TEMPLATE_TOKEN.title}
${MARKDOWN_TEMPLATE_TOKEN.sentence}
`
export const SENTENCE_TEMPLATE = `
## Sentence ${MARKDOWN_TEMPLATE_TOKEN.globalIndex}
**${MARKDOWN_TEMPLATE_TOKEN.originalSentence}**
${MARKDOWN_TEMPLATE_TOKEN.translatedSentence}
### Key Words
${MARKDOWN_TEMPLATE_TOKEN.words}
### Explanation
${MARKDOWN_TEMPLATE_TOKEN.explanation}
`
export const WORDS_TEMPLATE = `${MARKDOWN_TEMPLATE_TOKEN.wIndex}. **${MARKDOWN_TEMPLATE_TOKEN.word}** ${MARKDOWN_TEMPLATE_TOKEN.syntacticCategory}
${MARKDOWN_TEMPLATE_TOKEN.explanation}
`

View file

@ -0,0 +1,98 @@
// @vitest-environment jsdom
import { beforeEach, describe, expect, it, vi } from "vitest"
import { detectPageLanguageLightweight, PAGE_LANGUAGE_TEXT_SAMPLE_LIMIT } from "../page-language"
const { mockDetectLanguageWithSource } = vi.hoisted(() => ({
mockDetectLanguageWithSource: vi.fn(),
}))
vi.mock("../language", () => ({
detectLanguageWithSource: mockDetectLanguageWithSource,
}))
describe("detectPageLanguageLightweight", () => {
beforeEach(() => {
mockDetectLanguageWithSource.mockReset()
mockDetectLanguageWithSource.mockResolvedValue({ code: "eng", source: "franc" })
document.documentElement.removeAttribute("lang")
document.head.innerHTML = ""
document.title = ""
document.body.innerHTML = ""
})
it("uses html lang metadata without invoking text language detection", async () => {
document.documentElement.lang = "ja-JP"
document.body.textContent = "This body should not be needed."
const result = await detectPageLanguageLightweight()
expect(result).toEqual({
detectedCodeOrUnd: "jpn",
detectionSource: "metadata",
})
expect(mockDetectLanguageWithSource).not.toHaveBeenCalled()
})
it("uses page language meta tags before sampling body text", async () => {
document.head.innerHTML = `<meta property="og:locale" content="zh_TW">`
document.body.textContent = "This body should not be needed."
const result = await detectPageLanguageLightweight()
expect(result).toEqual({
detectedCodeOrUnd: "cmn-Hant",
detectionSource: "metadata",
})
expect(mockDetectLanguageWithSource).not.toHaveBeenCalled()
})
it("falls back to local text detection with a bounded title and body sample", async () => {
document.title = "A useful article title"
document.body.innerHTML = `
<main>
<p>${"English body text. ".repeat(300)}</p>
<script>const hidden = "ignored"</script>
</main>
`
const result = await detectPageLanguageLightweight()
expect(result).toEqual({
detectedCodeOrUnd: "eng",
detectionSource: "franc",
})
expect(mockDetectLanguageWithSource).toHaveBeenCalledTimes(1)
expect(mockDetectLanguageWithSource).toHaveBeenCalledWith(
expect.stringContaining("A useful article title"),
{ enableLLM: false },
)
const [textForDetection] = mockDetectLanguageWithSource.mock.calls[0]
expect(textForDetection).not.toContain("hidden")
expect(textForDetection.length).toBeLessThanOrEqual(PAGE_LANGUAGE_TEXT_SAMPLE_LIMIT + document.title.length + 2)
})
it("does not clone the document or read computed styles during initial detection", async () => {
document.body.textContent = "English body text. ".repeat(20)
const cloneSpy = vi.spyOn(document, "cloneNode")
const getComputedStyleSpy = vi.spyOn(window, "getComputedStyle")
await detectPageLanguageLightweight()
expect(cloneSpy).not.toHaveBeenCalled()
expect(getComputedStyleSpy).not.toHaveBeenCalled()
})
it("returns und when local text detection cannot identify the page language", async () => {
mockDetectLanguageWithSource.mockResolvedValueOnce({ code: "und", source: "fallback" })
document.body.textContent = "hi"
const result = await detectPageLanguageLightweight()
expect(result).toEqual({
detectedCodeOrUnd: "und",
detectionSource: "fallback",
})
})
})

View file

@ -1,57 +0,0 @@
import type { LangCodeISO6393 } from "@read-frog/definitions"
import type { DetectionSource } from "@/utils/content/language"
import { Readability } from "@mozilla/readability"
import { flattenToParagraphs } from "@/entrypoints/side.content/utils/article"
import { detectLanguageWithSource } from "@/utils/content/language"
import { getLocalConfig } from "../config/storage"
import { logger } from "../logger"
import { removeDummyNodes } from "./utils"
export type { DetectionSource } from "@/utils/content/language"
export async function getDocumentInfo(): Promise<{
article: ReturnType<Readability<Node>["parse"]>
paragraphs: string[]
detectedCodeOrUnd: LangCodeISO6393 | "und"
detectionSource: DetectionSource
}> {
const documentClone = document.cloneNode(true)
await removeDummyNodes(documentClone as Document)
const article = new Readability(documentClone as Document, {
serializer: el => el,
}).parse()
const paragraphs = article?.content
? flattenToParagraphs(article.content)
: []
logger.info("article", article)
// Get config to check if LLM detection is enabled
const config = await getLocalConfig()
// Combine title and content for detection
const title = article?.title || ""
const content = article?.textContent || ""
const textForDetection = `${title}\n\n${content}`
// Detect language with optional LLM enhancement
// Only use LLM when user has configured auto-translate or skip languages,
// otherwise detecting page language with LLM is wasteful since nothing depends on the result.
const hasAutoTranslateOrSkip = (config?.translate.page.autoTranslateLanguages?.length ?? 0) > 0
|| (config?.translate.page.skipLanguages?.length ?? 0) > 0
const enableLLM = config?.languageDetection.mode === "llm" && hasAutoTranslateOrSkip
const { code: detectedCodeOrUnd, source: detectionSource } = await detectLanguageWithSource(textForDetection, {
enableLLM,
maxLengthForLLM: 1500,
})
logger.info("final detectionSource", detectionSource)
logger.info("final detectedCodeOrUnd", detectedCodeOrUnd)
return {
article,
paragraphs,
detectedCodeOrUnd,
detectionSource,
}
}

View file

@ -0,0 +1,210 @@
import type { LangCodeISO6391, LangCodeISO6393 } from "@read-frog/definitions"
import type { DetectionSource } from "./language"
import {
ISO6393_TO_6391,
LANG_CODE_ISO6393_OPTIONS,
LOCALE_TO_ISO6393,
} from "@read-frog/definitions"
import { detectLanguageWithSource } from "./language"
export const PAGE_LANGUAGE_TEXT_SAMPLE_LIMIT = 3000
const SHOW_TEXT = 4
const FILTER_ACCEPT = 1
const FILTER_REJECT = 2
const LANGUAGE_META_KEYS = new Set([
"content-language",
"dc.language",
"dcterms.language",
"inlanguage",
"language",
"og:locale",
])
const SKIPPED_TEXT_PARENT_TAGS = new Set([
"SCRIPT",
"STYLE",
"NOSCRIPT",
"IFRAME",
"SVG",
])
const TRADITIONAL_CHINESE_REGIONS = new Set(["hk", "mo", "tw"])
const ISO6393_BY_LOWERCASE = new Map(
LANG_CODE_ISO6393_OPTIONS.map(code => [code.toLowerCase(), code] as const),
)
const ISO6391_TO_ISO6393 = createISO6391ToISO6393Map()
export type PageLanguageDetectionSource = DetectionSource | "metadata"
export interface PageLanguageDetectionResult {
detectedCodeOrUnd: LangCodeISO6393 | "und"
detectionSource: PageLanguageDetectionSource
}
function createISO6391ToISO6393Map() {
const localeMap = new Map<string, LangCodeISO6393>()
for (const [iso6393, iso6391] of Object.entries(ISO6393_TO_6391) as Array<[LangCodeISO6393, LangCodeISO6391 | undefined]>) {
if (iso6391 && !localeMap.has(iso6391.toLowerCase())) {
localeMap.set(iso6391.toLowerCase(), iso6393)
}
}
for (const [locale, iso6393] of Object.entries(LOCALE_TO_ISO6393) as Array<[LangCodeISO6391, LangCodeISO6393 | undefined]>) {
if (iso6393) {
localeMap.set(locale.toLowerCase(), iso6393)
}
}
return localeMap
}
function resolveLanguageToken(token: string): LangCodeISO6393 | null {
const normalizedToken = token
.replace(/_/g, "-")
.replace(/\..*$/, "")
.trim()
if (!normalizedToken)
return null
const lowercaseToken = normalizedToken.toLowerCase()
const exactISO6393 = ISO6393_BY_LOWERCASE.get(lowercaseToken)
if (exactISO6393)
return exactISO6393
const parts = lowercaseToken.split("-").filter(Boolean)
if (parts[0] === "zh") {
if (parts.includes("yue"))
return "yue"
if (parts.includes("hant") || parts.some(part => TRADITIONAL_CHINESE_REGIONS.has(part)))
return "cmn-Hant"
return "cmn"
}
const exactLocale = ISO6391_TO_ISO6393.get(lowercaseToken)
if (exactLocale)
return exactLocale
const primaryLanguage = parts[0]
return primaryLanguage ? ISO6391_TO_ISO6393.get(primaryLanguage) ?? null : null
}
export function resolveLanguageCodeFromLocale(value: string | null | undefined): LangCodeISO6393 | null {
if (!value)
return null
const tokens = value.split(/[,;]/)
for (const token of tokens) {
const code = resolveLanguageToken(token)
if (code)
return code
}
return null
}
function getMetaLanguageCandidates(doc: Document): string[] {
const candidates: string[] = []
const htmlLang = doc.documentElement?.getAttribute("lang")
if (htmlLang)
candidates.push(htmlLang)
for (const meta of Array.from(doc.querySelectorAll("meta"))) {
const keys = [
meta.getAttribute("http-equiv"),
meta.getAttribute("name"),
meta.getAttribute("property"),
meta.getAttribute("itemprop"),
].map(value => value?.trim().toLowerCase()).filter((value): value is string => Boolean(value))
if (keys.some(key => LANGUAGE_META_KEYS.has(key))) {
const content = meta.getAttribute("content")
if (content)
candidates.push(content)
}
}
return candidates
}
function normalizeTextSample(text: string): string {
return text.replace(/\s+/g, " ").trim()
}
function getTextParentElement(node: Node): Element | null {
const parent = node.parentNode
return parent?.nodeType === Node.ELEMENT_NODE ? parent as Element : null
}
function collectPageTextSample(root: Node | null | undefined, maxLength = PAGE_LANGUAGE_TEXT_SAMPLE_LIMIT): string {
if (!root || maxLength <= 0)
return ""
const doc = root.nodeType === Node.DOCUMENT_NODE
? root as Document
: root.ownerDocument
if (!doc?.createTreeWalker)
return normalizeTextSample(root.textContent ?? "").slice(0, maxLength)
const walker = doc.createTreeWalker(root, SHOW_TEXT, {
acceptNode(node) {
const parentElement = getTextParentElement(node)
if (!parentElement || SKIPPED_TEXT_PARENT_TAGS.has(parentElement.tagName))
return FILTER_REJECT
return normalizeTextSample(node.textContent ?? "") ? FILTER_ACCEPT : FILTER_REJECT
},
})
let sample = ""
let currentNode = walker.nextNode()
while (currentNode && sample.length < maxLength) {
const text = normalizeTextSample(currentNode.textContent ?? "")
if (text) {
const separator = sample ? " " : ""
const remainingLength = maxLength - sample.length - separator.length
if (remainingLength <= 0)
break
sample += `${separator}${text.slice(0, remainingLength)}`
}
currentNode = walker.nextNode()
}
return sample
}
export async function detectPageLanguageLightweight(doc: Document = document): Promise<PageLanguageDetectionResult> {
for (const candidate of getMetaLanguageCandidates(doc)) {
const code = resolveLanguageCodeFromLocale(candidate)
if (code) {
return {
detectedCodeOrUnd: code,
detectionSource: "metadata",
}
}
}
const textForDetection = [
doc.title,
collectPageTextSample(doc.body),
].filter(Boolean).join("\n\n")
const { code, source } = await detectLanguageWithSource(textForDetection, {
enableLLM: false,
})
return {
detectedCodeOrUnd: code,
detectionSource: source,
}
}

View file

@ -1,23 +1,8 @@
import { getLocalConfig } from "../config/storage"
import { DEFAULT_CONFIG } from "../constants/config"
import { isDontWalkIntoAndDontTranslateAsChildElement, isHTMLElement } from "../host/dom/filter"
export const MAX_TEXT_LENGTH = 3000
const ZERO_WIDTH_CHARS_RE = /[\u200B-\u200D\uFEFF]/g
const WHITESPACE_RUN_RE = /\s+/g
export async function removeDummyNodes(root: Document) {
const elements = root.querySelectorAll("*")
const config = await getLocalConfig() ?? DEFAULT_CONFIG
elements.forEach((element) => {
const isDontTranslate = isHTMLElement(element) && isDontWalkIntoAndDontTranslateAsChildElement(element, config)
if (isDontTranslate) {
element.remove()
}
})
}
/**
* Clean and truncate article text for post processing
*/

View file

@ -2,18 +2,10 @@
import { beforeEach, describe, expect, it, vi } from "vitest"
const mockParse = vi.fn()
const mockRemoveDummyNodes = vi.fn()
const mockWarn = vi.fn()
vi.mock("@mozilla/readability", () => ({
Readability: vi.fn().mockImplementation(() => ({
parse: mockParse,
})),
}))
vi.mock("@/utils/content/utils", () => ({
removeDummyNodes: mockRemoveDummyNodes,
const { mockDefuddleConstructor, mockParse, mockWarn } = vi.hoisted(() => ({
mockDefuddleConstructor: vi.fn(),
mockParse: vi.fn(),
mockWarn: vi.fn(),
}))
vi.mock("@/utils/logger", () => ({
@ -24,17 +16,28 @@ vi.mock("@/utils/logger", () => ({
async function loadModule() {
vi.resetModules()
vi.doMock("defuddle", () => ({
__esModule: true,
default: class MockDefuddle {
constructor(...args: unknown[]) {
mockDefuddleConstructor(...args)
}
parse() {
return mockParse()
}
},
}))
return await import("../webpage-context")
}
describe("getOrCreateWebPageContext", () => {
beforeEach(() => {
mockDefuddleConstructor.mockReset()
mockParse.mockReset()
mockRemoveDummyNodes.mockReset()
mockWarn.mockReset()
mockParse.mockReturnValue({ textContent: "Readable page body" })
mockRemoveDummyNodes.mockResolvedValue(undefined)
mockParse.mockReturnValue({ contentMarkdown: "# Readable page body" })
document.title = "Original Title"
document.body.innerHTML = "<main>Page body</main>"
@ -50,12 +53,26 @@ describe("getOrCreateWebPageContext", () => {
const second = await getOrCreateWebPageContext()
expect(first?.webTitle).toBe("Original Title")
expect(first?.webContent).toBeTruthy()
expect(first?.webContent).toBe("# Readable page body")
expect(second).toEqual({
url: first?.url,
webTitle: "Original Title",
webContent: first?.webContent,
})
expect(mockDefuddleConstructor).toHaveBeenCalledTimes(1)
})
it("parses webpage content as markdown with Defuddle", async () => {
const { getOrCreateWebPageContext } = await loadModule()
const result = await getOrCreateWebPageContext()
expect(result?.webContent).toBe("# Readable page body")
expect(mockDefuddleConstructor).toHaveBeenCalledWith(document, {
markdown: true,
url: window.location.href,
useAsync: false,
})
})
it("refreshes the cached title and content after the URL changes", async () => {
@ -65,7 +82,7 @@ describe("getOrCreateWebPageContext", () => {
document.title = "Next Article Title"
document.body.innerHTML = "<main>Next article body</main>"
mockParse.mockReturnValueOnce({ textContent: "Next readable page body" })
mockParse.mockReturnValueOnce({ contentMarkdown: "## Next readable page body" })
window.history.replaceState({}, "", "/article-2")
const second = await getOrCreateWebPageContext()
@ -81,11 +98,27 @@ describe("getOrCreateWebPageContext", () => {
const longContent = "x".repeat(2100)
document.body.innerHTML = `<main>${longContent}</main>`
mockParse.mockReturnValueOnce({ textContent: longContent })
mockParse.mockReturnValueOnce({ contentMarkdown: longContent })
const result = await getOrCreateWebPageContext()
expect(result?.webContent).toHaveLength(2000)
expect(result?.webContent).toBe(longContent.slice(0, 2000))
})
it("falls back to body text when Defuddle parsing fails", async () => {
mockParse.mockImplementationOnce(() => {
throw new Error("parse failed")
})
document.body.innerHTML = "<main>Fallback body text</main>"
const { getOrCreateWebPageContext } = await loadModule()
const result = await getOrCreateWebPageContext()
expect(result?.webContent).toBe("Fallback body text")
expect(mockWarn).toHaveBeenCalledWith(
"Defuddle parsing failed, falling back to body text:",
expect.any(Error),
)
})
})

View file

@ -1,6 +1,4 @@
import type { WebPageContext } from "@/types/content"
import { Readability } from "@mozilla/readability"
import { removeDummyNodes } from "@/utils/content/utils"
import { logger } from "@/utils/logger"
import { truncateWebPageContent } from "./webpage-content"
@ -13,14 +11,19 @@ let cachedWebPageContext: CachedWebPageContext | null = null
async function extractWebpageContent(): Promise<string> {
try {
const documentClone = document.cloneNode(true) as Document
await removeDummyNodes(documentClone)
const article = new Readability(documentClone, { serializer: el => el }).parse()
if (article?.textContent)
return article.textContent
const { default: Defuddle } = await import("defuddle")
const result = new Defuddle(document, {
markdown: true,
url: window.location.href,
useAsync: false,
}).parse()
const markdownContent = result.contentMarkdown || result.content
if (markdownContent)
return markdownContent
}
catch (error) {
logger.warn("Readability parsing failed, falling back to body textContent:", error)
logger.warn("Defuddle parsing failed, falling back to body text:", error)
}
return document.body?.textContent || ""
}

View file

@ -1,142 +0,0 @@
import { LANG_CODE_TO_EN_NAME } from "@read-frog/definitions"
export function getAnalyzePrompt(targetLang: string) {
return `# Identity
You are an language teacher who explains things vividly.
Your student speaks target language: ${targetLang}
# Variables
- targetLang: ${targetLang}
# Instruction
You will be given a JSON object.
\`\`\`
{
"originalTitle": string | undefined | null,
"content": string
}
\`\`\`
You should analyze the content:
1. Determine if the content is an article or part of a long article or book (true or false). If the content appears inconsistent and not like from one coherent piece, return false.
2. If the answer to step 1 is true:
- Identify the main point of the article and exclude irrelevant content.
- Detect the language of the relevant content and assign it to "detectedLang".
- Summarize the relevant text into a short summary in content's language.
- Provide an introduction in ${targetLang} before explaining specific parts of the content. Include necessary background information and a very short summary to engage the student.
- List specialized terminology involved in the content.
3. If the answer to step 1 is false, only determine the language of the content and return an empty string "" for the other corresponding string fields.
# Output Format
Your response should following the JSON format:
\`\`\`json
{
"isArticle": boolean,
"detectedLang": string, // ISO 639-3 language code subset
"summary": string, // in "detectedLang"
"introduction": string, // in language ${targetLang} for your student
"terms": string[] // in "detectedLang"
}
\`\`\`
ISO 639-3 language code subset to English name, key is code, value is English name of the language:
${JSON.stringify(LANG_CODE_TO_EN_NAME)}
If the language is not in the subset, return "und" for "detectedLang".
# Examples
<example>
Variables:
- targetLang: Simplified Mandarin Chinese
Input:
{
"originalTitle": "Why British MPs should vote for assisted dying",
"content": "This newspaper believes in the liberal principle that people should have the right to choose the manner of their own death. So do two-thirds of Britons, who for decades have been in favour of assisted dying for those enduring unbearable suffering. And so do the citizens of many other democracies18 jurisdictions have passed laws in the past decade.
Despite this, Westminster MPs look as if they could vote down a bill on November 29th that would introduce assisted dying into England and Wales. They would be squandering a rare chance to enrich people's fundamental liberties.",
}
Output:
{
"isArticle": true,
"detectedLang": "eng",
"summary": "The piece contends that although a large majority of Britons—and many other democracies—support assisted dying, Members of Parliament in Westminster appear ready to reject a November 29th bill that would legalise it in England and Wales, thereby forfeiting an opportunity to broaden personal liberties.",
"introduction": "这段文字摘自一篇讨论""assisted dying立法的社论。英国公众长期支持这种做法并指出全球已有多地通过相关法律随后英国议会让英格兰和威尔士合法化协助死亡的提案。我们一起来通过这篇文章了解——个人自主权与立法进程之间的张力吧。",
"terms": [
"assisted dying",
"liberal principle",
"Westminster MPs",
"bill",
"fundamental liberties",
"jurisdictions"
]
}
</example>
<example>
Variables:
- targetLang: English
Input:
{
"originalTitle": "ゆいごん",
"content": "親父(おやじ)が病気になり,もう死ぬという時に,息子を呼んで,「もはや,わしも,この世(よ)におさらばじゃ。いっておくが,わしが死んでも,必ず(かならず)、葬式(そうしき)などはするなよ。こもに包んで(つつんで),川(かわ)へながせ。」と,心にもないことを言いました。実は,この親父,前前(まえまえ)から,息子のへそまがりぶりを知っていましたから,遺言(ゆいごん)は,反対(はんたい)の事を言っておけば,立派(りっぱ)な葬式をするだろうと思ったのです。ところが,親父の遺言をじっと聞いていた息子,「安心してください。これまで,親(おや)のいうことは,何一つ(なにひとつ),聞かなかったから,せめて,一生(いっしょう)に一度ぐらいは,言われた通りにしましょう。」"
}
Output:
{
"isArticle": true,
"detectedLang": "jpn",
"summary": "死期が近い父親は、へそ曲がりの息子が逆を行動すると踏んで「葬式はするな、むしろ体を包んで川へ流せ」と遺言した。ところが息子は「今まで一度も親の言うことを聞かなかったから、せめて最期だけは従う」と宣言し、父の思惑を裏切った皮肉な逸話である。",
"introduction": "This short Japanese anecdote hinges on dramatic irony. A dying father, well aware of his sons contrary nature, tries a bit of reverse psychology: he expressly forbids a funeral, expecting the son will do the opposite and honor him properly. To his surprise, the son resolves to obey for the first—and only—time. Lets explore how the story uses language and cultural cues to deliver its punch line.",
"terms": [
"遺言",
"葬式",
"へそまがり",
"包む",
"川へながす"
]
}
</example>
<example>
Variables:
- targetLang: Russian
Input:
{
"originalTitle": "The Economist | Independent journalism",
"content": "Warren Buffett said he plans to retire from his investment firm, Berkshire Hathaway, at the end of the year...Adolf Hitlers ignominious death proves the self-defeating, destructive nature of dictatorship, writes Richard Evans.Your AI meeting notes are ready."
}
Output:
{
"isArticle": false,
"detectedLang": "eng",
"summary": "",
"introduction": "",
"terms": []
}
</example>
<example>
Variables:
- targetLang: English
Input:
{
"originalTitle": "Περὶ φύσεως",
"content": "Ἀρχὴ πολιτείας ἀνδρῶν ἀγαθῶν παιδεία· γένοιτο δ᾽ ἂν εἰς εὐδαιμονίαν ἀρετῆς μέτοχος."
}
Output:
{
"isArticle": true,
"detectedLang": "und",
"summary": "",
"introduction": "",
"terms": []
}
</example>
Please return the response as JSON format directly.
`
}

View file

@ -1,336 +0,0 @@
import type { LangLevel } from "@read-frog/definitions"
import { syntacticCategoryAbbr } from "@/types/content"
export function getExplainPrompt(sourceLang: string, targetLang: string, langLevel: LangLevel) {
return `# Identity
You are an ${sourceLang} teacher who explains things vividly. Your student speaks ${targetLang}. Your student's language level is ${langLevel}.
# Variables
- sourceLang: ${sourceLang}
- targetLang: ${targetLang}
- langLevel: ${langLevel}
# Instruction
You will be given a JSON object.
\`\`\`
{
"overallSummary": string,
"paragraphs": string[],
}
\`\`\`
For each paragraph, you should:
1. Determine if the paragraph is related to the overall summary.
2. If yes, perform the following:
- Split the paragraph into sentences.
- For each sentence:
a) Fix any orthographic or typographic errors.
b) Translate the sentence into ${targetLang}.
c) Select difficult or interesting words, phrases, or technical terms suitable for your student's source language level (${langLevel}). Explain their part of speech and contextual understanding. For higher language level, you should not select too basic words. For lower language level, you should explain more basic words or phrases.
d) Explain the sentence to your student based on their language level (${langLevel}). Translate the sentence first, then explain the words, phrases, and whole sentences vividly. Provide analysis of grammar if the sentence is complex. Provide context, examples, or reference classical texts if beneficial.
3. If no, exclude the paragraph from your response.
If your student is Chinese learn Japanese, some words have similar shape and meaning, you can ignore and not explain them. For example, you don't need to explain "親父" because it's similar to "父亲".
# Output format
Your response should be the JSON format:
\`\`\`
{
"paragraphs": {
"originalSentence": string, // fixed version of the original sentence
"translatedSentence": string, // use language ${targetLang}
"words": {
"word": string,
"syntacticCategory": string, // select from the syntacticCategoryAbbr list below
"explanation": string, // explain the word use language ${targetLang}
}[], // words, phrases, technical terms, select less words for higher langLevel, If your student is Chinese learn Japanese, don't select words have similar shape and meaning in Chinese and Japanese. For example, you don't need to explain "親父" because it's similar to "父亲".
"explanation": string, // explain the sentence use language ${targetLang}, may include grammar analysis if the sentence is complex
}[][], // 1-dimensional means paragraph, 2-dimensional means sentence
}
\`\`\`
syntacticCategoryAbbr list:
${JSON.stringify(syntacticCategoryAbbr.options)}
# Examples
<example>
Variables:
- sourceLang: English
- targetLang: Simplified Mandarin Chinese
- langLevel: intermediate
Input:
{
"overallSummary": "The piece contends that although a large majority of Britons—and many other democracies—support assisted dying, Members of Parliament in Westminster appear ready to reject a November 29th bill that would legalise it in England and Wales, thereby forfeiting an opportunity to broaden personal liberties.",
"paragraphs": [
"T his newspaper believes in the liberal principle that people should have the right to choose the manner of their own death. So do two-thirds of Britons, who for decades have been in favour of assisted dying for those enduring unbearable suffering. And so do the citizens of many other democracies—18 jurisdictions have passed laws in the past decade.",
"A cookie (also known as a web cookie or browser cookie) is a small piece of data a server sends to a user's web browser. The browser may store cookies, create new cookies, modify existing ones, and send them back to the same server with later requests.",
"Despite this, Westminster MPs look as if they could vote down a bill on November 29th that would introduce assisted dying into England and Wales. They would be squandering a rare chance to enrich people's fundamental liberties.",
]
}
Output:
{
"paragraphs": [
[
{
"originalSentence": "This newspaper believes in the liberal principle that people should have the right to choose the manner of their own death.",
"translatedSentence": "本报相信自由主义原则,即人们应有权选择自己死亡的方式。",
"words": [
{
"word": "manner",
"syntacticCategory": "n.",
"explanation": "方式,手段。常用于表达做某事的方式,比如 'in a polite manner' 表示以有礼貌的方式。"
},
{
"word": "liberal",
"syntacticCategory": "adj.",
"explanation": "自由主义的,强调个人自由,特别是在政治和社会问题上的选择权。"
}
],
"explanation": "这句话表达了报社支持自由主义核心理念:人应有权决定自己如何离世。比如一个长期卧床的病人,可能希望用一种体面、无痛的方式结束生命。这里的 'manner' 强调的是“方式”,不是时间或原因。"
},
{
"originalSentence": "So do two-thirds of Britons, who for decades have been in favour of assisted dying for those enduring unbearable suffering.",
"translatedSentence": "三分之二的英国人也持这种观点,他们几十年来一直支持为承受难以忍受痛苦的人提供协助死亡的选择。",
"words": [
{
"word": "in favour of",
"syntacticCategory": "ph.",
"explanation": "支持,赞成;常用于表达态度,例如 'She is in favour of the proposal.'"
},
{
"word": "enduring",
"syntacticCategory": "v.",
"explanation": "忍受,持续经历痛苦或困境。例如:'He is enduring a lot of stress.'"
},
{
"word": "unbearable",
"syntacticCategory": "adj.",
"explanation": "难以忍受的,极度痛苦的。来自 bear承受的否定形式。"
}
],
"explanation": "enduring unbearable suffering 强调了他们所经历的是极度难忍的状态,是立法支持背后的同情理由。用 so + 助动词 + 主语’ 的倒装形式表达三分之二的英国人同样长期支持安乐死,助动词 do 与前一句的时态保持一致,代替前一句 have been in favour 的一般意义动作。"
},
{
"originalSentence": "And so do the citizens of many other democracies—18 jurisdictions have passed laws in the past decade.",
"translatedSentence": "其他许多民主国家的公民也同样支持——在过去十年中已有18个司法辖区通过了相关法律。",
"words": [
{
"word": "jurisdiction",
"syntacticCategory": "n.",
"explanation": "司法辖区,有独立立法或执法权的地区,比如国家、省或州。"
},
{
"word": "passed",
"syntacticCategory": "v.",
"explanation": "通过(法律),指立法机构正式批准一项法案。"
}
],
"explanation": "这句话扩展到全球背景说明其他民主国家也采取了类似立法。jurisdictions 是法律上的概念,指有能力制定法律的地区。说明协助死亡不是英国特有问题,而是普遍议题。"
}
],
[
{
"originalSentence": "Despite this, Westminster MPs look as if they could vote down a bill on November 29th that would introduce assisted dying into England and Wales.",
"translatedSentence": "尽管如此威斯敏斯特的议员们看起来可能会在11月29日否决一项旨在在英格兰和威尔士引入协助死亡的法案。",
"words": [
{
"word": "vote down",
"syntacticCategory": "ph.",
"explanation": "投票否决,指通过投票方式拒绝通过法案。"
},
{
"word": "bill",
"syntacticCategory": "n.",
"explanation": "法案,尚未成为法律的提案,需要通过议会表决。"
}
],
"explanation": "这句话指出尽管有广泛支持议会可能仍然否决这项法案。vote down 是立法程序中非常关键的动词短语,表示通过投票来阻止法案的通过。"
},
{
"originalSentence": "They would be squandering a rare chance to enrich people's fundamental liberties.",
"translatedSentence": "他们将浪费一个难得的机会,来扩展人们的基本自由。",
"words": [
{
"word": "squander",
"syntacticCategory": "v.",
"explanation": "浪费,尤其指轻率地错失时间、机会或资源。例如:'He squandered his inheritance.'"
},
{
"word": "liberties",
"syntacticCategory": "n.",
"explanation": "自由权,特别是指受到法律保护的基本个人权利。"
}
],
"explanation": "这句话批评议员们可能浪费一次拓展个人自由的机会。squander 强调这种机会的宝贵以及对其忽视的严重性。fundamental liberties 是自由社会最核心的价值观,如言论自由、宗教自由等,这里是指生命终结方式的选择权。"
}
]
]
}
</example>
<example>
Variables:
- sourceLang: English
- targetLang: Japanese
- langLevel: advanced
Input:
{
"overallSummary": "The piece contends that although a large majority of Britons—and many other democracies—support assisted dying, Members of Parliament in Westminster appear ready to reject a November 29th bill that would legalise it in England and Wales, thereby forfeiting an opportunity to broaden personal liberties.",
"paragraphs": [
"T his newspaper believes in the liberal principle that people should have the right to choose the manner of their own death. So do two-thirds of Britons, who for decades have been in favour of assisted dying for those enduring unbearable suffering. And so do the citizens of many other democracies—18 jurisdictions have passed laws in the past decade.",
"A cookie (also known as a web cookie or browser cookie) is a small piece of data a server sends to a user's web browser. The browser may store cookies, create new cookies, modify existing ones, and send them back to the same server with later requests.",
"Despite this, Westminster MPs look as if they could vote down a bill on November 29th that would introduce assisted dying into England and Wales. They would be squandering a rare chance to enrich people's fundamental liberties.",
]
}
Output:
{
"paragraphs": [
[
{
"originalSentence": "This newspaper believes in the liberal principle that people should have the right to choose the manner of their own death.",
"translatedSentence": "本紙は、人々が自らの死に方を選ぶ権利を持つべきだという自由主義の原則を支持している。",
"words": [
{
"word": "manner",
"syntacticCategory": "n.",
"explanation": "方法・様式。この文脈では単なる「やり方」ではなく、人が自分の人生をどのように終えるかという具体的かつ哲学的な選択を指す。尊厳・自己決定・意図的な制御を含意している。"
}
],
"explanation": "この文は新聞社の立場を示しており、人は自らの命の終わり方をコントロールする権利があるべきだと主張している。「manner」は単なる手段ではなく、死に際しての尊厳ある選択、例えば苦痛の中で死ぬのか、穏やかに終えるのかといった生き方・死に方の哲学を含んでいる。"
},
{
"originalSentence": "So do two-thirds of Britons, who for decades have been in favour of assisted dying for those enduring unbearable suffering.",
"translatedSentence": "同じ考えを持つ英国人は3分の2にものぼり、数十年にわたって、耐え難い苦しみを抱える人々に対する尊厳死の選択肢を支持してきた。",
"words": [],
"explanation": "この文は、社会の大多数がこの問題に対して倫理的な共感を抱いていることを示している。「enduring unbearable suffering」という表現は、医学的にも人道的にも深刻な状況を意味し、それに対応するための法的手段への支持が根強いことを反映している。"
},
{
"originalSentence": "And so do the citizens of many other democracies—18 jurisdictions have passed laws in the past decade.",
"translatedSentence": "他の多くの民主国家の市民も同様の立場を取っており、過去10年間で18の法域が関連法を可決している。",
"words": [
{
"word": "jurisdictions",
"syntacticCategory": "n.",
"explanation": "法域・司法管轄区域。国家、州、または地方自治体など、独自の法制度や立法権を有する地域を指し、多様な法文化と自治の存在を前提とする。"
}
],
"explanation": "この文は、協力的な死尊厳死の合法化が国際的な潮流になっていることを示している。「jurisdictions」は単に地理的な範囲ではなく、法制度の独立性と多様性を示しており、価値観の広がりも表している。"
}
],
[
{
"originalSentence": "Despite this, Westminster MPs look as if they could vote down a bill on November 29th that would introduce assisted dying into England and Wales.",
"translatedSentence": "それにもかかわらず、ウェストミンスターの議員たちは、11月29日にイングランドとウェールズに尊厳死を導入する法案を否決する可能性があるように見える。",
"words": [
{
"word": "vote down",
"syntacticCategory": "ph.",
"explanation": "投票で否決する。特に議会制民主主義において、社会的に意義のある法案が制度的または政治的な理由で潰される場合に使われる表現。"
}
],
"explanation": "この文は、社会的支持があるにもかかわらず、政治的・宗教的あるいは戦略的な理由で議会がその声を無視する可能性を指摘している。「vote down」は、制度的な抵抗や保守性を強調する語としても機能している。"
},
{
"originalSentence": "They would be squandering a rare chance to enrich people's fundamental liberties.",
"translatedSentence": "彼らは、人々の基本的自由を拡張するまたとない機会を無駄にすることになるだろう。",
"words": [
{
"word": "squandering",
"syntacticCategory": "v.",
"explanation": "浪費する。特に歴史的・道徳的に重要なチャンスを軽視・無視することによって、後々まで悔いが残るような喪失を意味する。"
},
{
"word": "fundamental liberties",
"syntacticCategory": "ph.",
"explanation": "基本的自由。民主社会を構成する核心的な権利であり、言論、信教、自律の自由を含む。この文脈では、自らの死を選ぶ自由もこれに含まれるとされる。"
}
],
"explanation": "この文には強い批判と歴史的警鐘のニュアンスが込められており、議会がこの法案を否決することは、社会的進歩の重要な機会を逃すことだと訴えている。「fundamental liberties」は、個人の尊厳を根拠とした権利概念の延長線上に、死に方の選択自由を位置づけている。"
}
]
]
}
Note:
- This example is for advanced level, so less words are selected.
</example>
Please return the response as JSON format directly.
`
}
const _japanseExample = `
<example>
Variables:
- sourceLang: Japanese
- targetLang: Simplified Mandarin Chinese
- langLevel: beginner
Input:
{
"overallSummary": "死にそうな親父は「葬式をするな」と言ったが、本心ではなかった。へそまがりの息子が逆に立派な葬式をすると期待していた。でも息子は「一度くらい言う通りにする」と答えた。",
"paragraphs": [
"親父(おやじ)が病気になり,もう死ぬという時に,息子を呼んで,"
]
}
Output:
{
"paragraphs": [
[
{
"originalSentence": "親父(おやじ)が病気になり,もう死ぬという時に,息子を呼んで,",
"translatedSentence": "父亲生病了,到了快要死的时候,就叫来了儿子。",
"words": [
{
"word": "が",
"syntacticCategory": "part.",
"explanation": "主语助词,用于标记句子中的主语(这里是“亲父”)。"
},
{
"word": "病気(びょうき)",
"syntacticCategory": "n.",
"explanation": "生病、疾病。"
},
{
"word": "になる",
"syntacticCategory": "v.",
"explanation": "变成……,常见表达形式,'病気になる' 就是“生病了”。"
},
{
"word": "もう",
"syntacticCategory": "adv.",
"explanation": "已经、快要,表示某事即将发生。"
},
{
"word": "という時(とき)",
"syntacticCategory": "ph.",
"explanation": "……的时候,常用于描述时间背景。"
},
{
"word": "息子(むすこ)",
"syntacticCategory": "n.",
"explanation": "儿子。"
},
{
"word": "を",
"syntacticCategory": "part.",
"explanation": "宾语助词,标记“动作的对象”,这里是“呼んで”的对象。"
},
{
"word": "呼んで(よんで)",
"syntacticCategory": "v.",
"explanation": "叫、召唤,是动词“呼ぶ”的て形。"
}
],
"explanation": "这句话的结构是:“谁 + 生病了 + 到了快要死的时候 + 做了什么”。这里父亲生病了,快要去世,就把儿子叫过来。注意“~になる”表示状态的变化,“もう”表示即将发生。"
}
]
]
}
`