4 Commit-ok 3eac494bfb ... 536cd9c300

Szerző SHA1 Üzenet Dátum
  Gogs 536cd9c300 优化内存 1 hete
  Gogs fc26d48d8f 修改CUDA为cpu版本 1 hete
  Gogs 045cc2bd36 new update 3 hónapja
  Gogs a516d16d39 new branch 3 hónapja
6 módosított fájl, 267 hozzáadás és 127 törlés
  1. 2 3
      README.md
  2. 202 31
      spacyback/mainspacy.py
  3. 2 0
      spacyback/requirements.txt
  4. 13 3
      spacyback/start.sh
  5. 1 0
      spacyback/start_mem.sh
  6. 47 90
      spacyback/style_config.py

+ 2 - 3
README.md

@@ -1,11 +1,10 @@
-# Grammar Highlighter
+# Grammar Highlighter 新功能
 
 
 利用 spaCy 与 benepar 的句法信息,为英文文本提供可视化的语法高亮,帮助学习者或编辑快速看懂句子结构。项目暴露 FastAPI 接口,可单独调用 `/analyze`,也内置一个网页代理 `/proxy` 用来抓取文章后直接高亮,并带有可选的流式 TTS 朗读控件。
 利用 spaCy 与 benepar 的句法信息,为英文文本提供可视化的语法高亮,帮助学习者或编辑快速看懂句子结构。项目暴露 FastAPI 接口,可单独调用 `/analyze`,也内置一个网页代理 `/proxy` 用来抓取文章后直接高亮,并带有可选的流式 TTS 朗读控件。
 
 
 ## 功能特性
 ## 功能特性
 - **句法角色高亮**:识别主语、谓语、宾语、补语、同位语、状语、固定搭配等多种结构,同时对括号说明、绝对结构、固定短语等给予特殊样式。
 - **句法角色高亮**:识别主语、谓语、宾语、补语、同位语、状语、固定搭配等多种结构,同时对括号说明、绝对结构、固定短语等给予特殊样式。
-- **benepar Constituency 支持**:在成功加载 benepar 时会标注状语从句、名词性从句、非限定结构、关系从句等;若模型缺失自动回退到依存句法并给出警告。
-- **句子分析摘要**:每个句子带中文提示(可在 `style_config.py` 中通过 `SENTENCE_HELPER_ENABLED` 开关控制),说明句型、主谓宾、从句功能、连接词等。
+
 - **网页代理模式**:`/proxy` 路由用 httpx 拉取远程页面,提取正文、列表、代码块与图片占位符,随后复用同一高亮流程,可按需重新注入图片或代码片段。
 - **网页代理模式**:`/proxy` 路由用 httpx 拉取远程页面,提取正文、列表、代码块与图片占位符,随后复用同一高亮流程,可按需重新注入图片或代码片段。
 - **TTS 朗读**:UI 中的“朗读高亮文本/朗读选中文本”按钮会将文本发送到 `TTS_ENDPOINT`(默认 `http://141.140.15.30:8028/generate`),逐段播放流式音频。
 - **TTS 朗读**:UI 中的“朗读高亮文本/朗读选中文本”按钮会将文本发送到 `TTS_ENDPOINT`(默认 `http://141.140.15.30:8028/generate`),逐段播放流式音频。
 - **健壮的降级策略**:自动补全 spaCy 句子切分、禁用 benepar 时继续运行;`/health` 接口会暴露 benepar 状态与任何加载警告。
 - **健壮的降级策略**:自动补全 spaCy 句子切分、禁用 benepar 时继续运行;`/health` 接口会暴露 benepar 状态与任何加载警告。

+ 202 - 31
spacyback/mainspacy.py

@@ -4,16 +4,16 @@
 import asyncio
 import asyncio
 import html
 import html
 import json
 import json
+import os
 import re
 import re
 from collections import Counter
 from collections import Counter
 from dataclasses import dataclass, field
 from dataclasses import dataclass, field
 from html.parser import HTMLParser
 from html.parser import HTMLParser
 from string import Template
 from string import Template
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 from urllib import error as urllib_error, request as urllib_request
 from urllib import error as urllib_error, request as urllib_request
 from urllib.parse import urlparse, urlunparse
 from urllib.parse import urlparse, urlunparse
 
 
-import benepar
 import httpx
 import httpx
 import spacy
 import spacy
 from fastapi import FastAPI, HTTPException
 from fastapi import FastAPI, HTTPException
@@ -47,7 +47,7 @@ def _load_spacy_pipeline(
     except OSError:
     except OSError:
         try:
         try:
             spacy_download(model_name)
             spacy_download(model_name)
-            nlp = spacy.load(model_name)
+            nlp = spacy.load(model_name, disable=["tagger", "lemmatizer"])
         except Exception as exc:  # pragma: no cover - install helper
         except Exception as exc:  # pragma: no cover - install helper
             raise RuntimeError(
             raise RuntimeError(
                 f"spaCy model '{model_name}' is required. Install via `python -m spacy download {model_name}`."
                 f"spaCy model '{model_name}' is required. Install via `python -m spacy download {model_name}`."
@@ -57,17 +57,34 @@ def _load_spacy_pipeline(
     pipe_names = set(nlp.pipe_names)
     pipe_names = set(nlp.pipe_names)
     if not ({"parser", "senter", "sentencizer"} & pipe_names):
     if not ({"parser", "senter", "sentencizer"} & pipe_names):
         try:
         try:
-            nlp.add_pipe("sentencizer")
+            nlp.add_pipe("sentencizer", disable=["tagger", "lemmatizer"])
         except Exception:
         except Exception:
             pass  # if already present or unavailable, ignore
             pass  # if already present or unavailable, ignore
 
 
+    enable_benepar = os.getenv("ENABLE_BENEPAR", "0").strip().lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+
+    if not enable_benepar:
+        BENE_PAR_WARNING = (
+            "Benepar is disabled by ENABLE_BENEPAR. Using dependency-based spans."
+        )
+        return nlp
+
     # Try to add benepar
     # Try to add benepar
     if "benepar" not in nlp.pipe_names:
     if "benepar" not in nlp.pipe_names:
         try:
         try:
+            import benepar
+
             nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
             nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
             HAS_BENEPAR = True
             HAS_BENEPAR = True
         except ValueError:
         except ValueError:
             try:
             try:
+                import benepar
+
                 benepar.download(benepar_model)
                 benepar.download(benepar_model)
                 nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
                 nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
                 HAS_BENEPAR = True
                 HAS_BENEPAR = True
@@ -179,6 +196,16 @@ SUBORDINATORS_TO_FUNCTION = {
 }
 }
 FINITE_VERB_TAGS = {"VBD", "VBP", "VBZ"}
 FINITE_VERB_TAGS = {"VBD", "VBP", "VBZ"}
 NONFINITE_VERB_TAGS = {"VBG", "VBN"}
 NONFINITE_VERB_TAGS = {"VBG", "VBN"}
+CLAUSE_PREDICATE_DEPS = {
+    "advcl",
+    "ccomp",
+    "xcomp",
+    "acl",
+    "relcl",
+    "csubj",
+    "csubjpass",
+    "parataxis",
+}
 FIXED_MULTIWORD_PHRASES: Tuple[Tuple[re.Pattern, str], ...] = tuple(
 FIXED_MULTIWORD_PHRASES: Tuple[Tuple[re.Pattern, str], ...] = tuple(
     (
     (
         re.compile(pattern, re.IGNORECASE),
         re.compile(pattern, re.IGNORECASE),
@@ -370,6 +397,35 @@ def add_span(spans: List[Span], start_token: int, end_token: int, cls: str, attr
     spans.append(Span(start_token=start_token, end_token=end_token, cls=cls, attrs=attrs))
     spans.append(Span(start_token=start_token, end_token=end_token, cls=cls, attrs=attrs))
 
 
 
 
+def _prune_adverbial_spans(spans: List[Span], sentence_token_bounds: Tuple[int, int]) -> None:
+    """Drop redundant/oversized adverbial spans that make entire sentences underline."""
+    sent_start, sent_end = sentence_token_bounds
+    if sent_start < 0 or sent_end <= sent_start:
+        return
+    sent_length = sent_end - sent_start
+    filtered: List[Span] = []
+    seen_ranges: Set[Tuple[int, int]] = set()
+    for span in spans:
+        classes = span.cls.split()
+        if "role-adverbial" not in classes:
+            filtered.append(span)
+            continue
+        span_length = span.end_token - span.start_token
+        # Skip single-token adverbs and spans that swallow the whole sentence.
+        if span_length <= 1:
+            continue
+        coverage_start = max(span.start_token, sent_start)
+        coverage_end = min(span.end_token, sent_end)
+        if coverage_end - coverage_start >= sent_length:
+            continue
+        range_key = (coverage_start, coverage_end)
+        if range_key in seen_ranges:
+            continue
+        seen_ranges.add(range_key)
+        filtered.append(span)
+    spans[:] = filtered
+
+
 def subtree_char_span(token: SpacyToken) -> Tuple[int, int]:
 def subtree_char_span(token: SpacyToken) -> Tuple[int, int]:
     subtree = list(token.subtree)
     subtree = list(token.subtree)
     if not subtree:
     if not subtree:
@@ -562,7 +618,7 @@ def _is_finite_predicate_head(token: SpacyToken) -> bool:
         return True
         return True
     verb_forms = set(token.morph.get("VerbForm"))
     verb_forms = set(token.morph.get("VerbForm"))
     if "Inf" in verb_forms:
     if "Inf" in verb_forms:
-        return False
+        return _has_finite_auxiliary(token)
     if verb_forms & {"Part", "Ger"}:
     if verb_forms & {"Part", "Ger"}:
         return _has_finite_auxiliary(token)
         return _has_finite_auxiliary(token)
     if token.tag_ in NONFINITE_VERB_TAGS:
     if token.tag_ in NONFINITE_VERB_TAGS:
@@ -602,6 +658,24 @@ def _predicate_heads(sentence: SpacySpan) -> List[SpacyToken]:
     return ordered
     return ordered
 
 
 
 
+def _is_clause_predicate(token: SpacyToken) -> bool:
+    """Return True if predicate originates inside从句."""
+    if token.dep_ in CLAUSE_PREDICATE_DEPS:
+        return True
+    if token.dep_ != "conj":
+        return False
+    ancestor = token.head
+    safety = 0
+    while ancestor is not None and safety < 10:
+        if ancestor.dep_ in CLAUSE_PREDICATE_DEPS:
+            return True
+        if ancestor.dep_ != "conj" or ancestor.head is ancestor:
+            break
+        ancestor = ancestor.head
+        safety += 1
+    return False
+
+
 def _add_fixed_phrases(
 def _add_fixed_phrases(
     sentence: SpacySpan,
     sentence: SpacySpan,
     mapping: Dict[int, int],
     mapping: Dict[int, int],
@@ -652,7 +726,10 @@ def annotate_sentence(
 
 
     for head in _predicate_heads(sentence):
     for head in _predicate_heads(sentence):
         start_char, end_char = _predicate_span_bounds(head)
         start_char, end_char = _predicate_span_bounds(head)
-        add_char_based_span(spans, start_char, end_char, "role-predicate", mapping)
+        cls = "role-predicate"
+        if _is_clause_predicate(head):
+            cls = "role-predicate role-predicate-clause"
+        add_char_based_span(spans, start_char, end_char, cls, mapping)
         predicate_text = sentence.doc.text[start_char:end_char].strip()
         predicate_text = sentence.doc.text[start_char:end_char].strip()
         if summary is not None:
         if summary is not None:
             summary.predicates.append(predicate_text or head.text)
             summary.predicates.append(predicate_text or head.text)
@@ -693,12 +770,14 @@ def annotate_sentence(
         if tok.dep_ in {"amod", "poss", "compound", "nummod"}:
         if tok.dep_ in {"amod", "poss", "compound", "nummod"}:
             add_token(tok, "role-modifier")
             add_token(tok, "role-modifier")
 
 
-    adverbial_ranges = set()
-    for tok in sentence:
-        if tok.dep_ in ADVERBIAL_DEPS:
-            adverbial_ranges.add(subtree_char_span(tok))
-    for start_char, end_char in adverbial_ranges:
-        add_char_based_span(spans, start_char, end_char, "role-adverbial", mapping)
+    # Dependency-based adverbial spans are a fallback when constituency data is unavailable.
+    if not HAS_BENEPAR or BENE_PAR_WARNING:
+        adverbial_ranges = set()
+        for tok in sentence:
+            if tok.dep_ in ADVERBIAL_DEPS:
+                adverbial_ranges.add(subtree_char_span(tok))
+        for start_char, end_char in adverbial_ranges:
+            add_char_based_span(spans, start_char, end_char, "role-adverbial", mapping)
 
 
     for tok in sentence:
     for tok in sentence:
         if tok.dep_ == "appos":
         if tok.dep_ == "appos":
@@ -738,6 +817,7 @@ def annotate_sentence(
         summary,
         summary,
     )
     )
     _add_fixed_phrases(sentence, mapping, spans, summary)
     _add_fixed_phrases(sentence, mapping, spans, summary)
+    _prune_adverbial_spans(spans, sent_bounds)
 
 
     return spans, summary
     return spans, summary
 
 
@@ -906,6 +986,7 @@ def highlight_text_with_spacy(
     text: str,
     text: str,
     paragraph_meta: Optional[List[Dict[str, str]]] = None,
     paragraph_meta: Optional[List[Dict[str, str]]] = None,
     include_helper: bool = False,
     include_helper: bool = False,
+    paragraph_ranges: Optional[List[Tuple[int, int]]] = None,
 ) -> str:
 ) -> str:
     if NLP is None:
     if NLP is None:
         raise RuntimeError(f"spaCy pipeline unavailable: {NLP_LOAD_ERROR}")
         raise RuntimeError(f"spaCy pipeline unavailable: {NLP_LOAD_ERROR}")
@@ -923,12 +1004,22 @@ def highlight_text_with_spacy(
         )
         )
         doc = _run_pipeline_without_benepar(text)
         doc = _run_pipeline_without_benepar(text)
 
 
-    paragraph_ranges = _split_paragraph_ranges(text)
-    paragraph_counters = [0 for _ in paragraph_ranges]
+    ranges = None
+    if paragraph_ranges:
+        valid = True
+        for start, end in paragraph_ranges:
+            if start < 0 or end < start or end > len(text):
+                valid = False
+                break
+        if valid:
+            ranges = list(paragraph_ranges)
+    if not ranges:
+        ranges = _split_paragraph_ranges(text)
+    paragraph_counters = [0 for _ in ranges]
     paragraph_idx = 0
     paragraph_idx = 0
     paragraph_spans: List[Span] = []
     paragraph_spans: List[Span] = []
-    paragraph_attrs = paragraph_meta if paragraph_meta and len(paragraph_meta) == len(paragraph_ranges) else None
-    for idx, (start, end) in enumerate(paragraph_ranges):
+    paragraph_attrs = paragraph_meta if paragraph_meta and len(paragraph_meta) == len(ranges) else None
+    for idx, (start, end) in enumerate(ranges):
         attrs = None
         attrs = None
         if paragraph_attrs:
         if paragraph_attrs:
             attrs = paragraph_attrs[idx] or None
             attrs = paragraph_attrs[idx] or None
@@ -937,9 +1028,9 @@ def highlight_text_with_spacy(
     spans: List[Span] = list(paragraph_spans)
     spans: List[Span] = list(paragraph_spans)
 
 
     for sent in doc.sents:
     for sent in doc.sents:
-        while paragraph_idx < len(paragraph_ranges) and paragraph_ranges[paragraph_idx][1] <= sent.start_char:
+        while paragraph_idx < len(ranges) and ranges[paragraph_idx][1] <= sent.start_char:
             paragraph_idx += 1
             paragraph_idx += 1
-        current_idx = min(paragraph_idx, len(paragraph_ranges) - 1)
+        current_idx = min(paragraph_idx, len(ranges) - 1)
         paragraph_counters[current_idx] += 1
         paragraph_counters[current_idx] += 1
         sentence_label = _circled_number(paragraph_counters[current_idx])
         sentence_label = _circled_number(paragraph_counters[current_idx])
 
 
@@ -1032,8 +1123,20 @@ async def proxy(url: Optional[str] = None, show_images: bool = False):
     if not url:
     if not url:
         return HTMLResponse(_render_proxy_page(show_images=show_images))
         return HTMLResponse(_render_proxy_page(show_images=show_images))
     try:
     try:
-        normalized_url, title, page_text, images, code_blocks, paragraph_meta = await _fetch_remote_plaintext(url)
-        highlighted_fragment = highlight_text_with_spacy(page_text, paragraph_meta=paragraph_meta or None)
+        (
+            normalized_url,
+            title,
+            page_text,
+            images,
+            code_blocks,
+            paragraph_meta,
+            paragraph_ranges,
+        ) = await _fetch_remote_plaintext(url)
+        highlighted_fragment = highlight_text_with_spacy(
+            page_text,
+            paragraph_meta=paragraph_meta or None,
+            paragraph_ranges=paragraph_ranges or None,
+        )
         if code_blocks:
         if code_blocks:
             highlighted_fragment = _inject_proxy_codeblocks(highlighted_fragment, code_blocks)
             highlighted_fragment = _inject_proxy_codeblocks(highlighted_fragment, code_blocks)
         image_notice = None
         image_notice = None
@@ -1392,9 +1495,30 @@ function handlePauseResumeToggle() {
   }
   }
 }
 }
 
 
+function normalizeTtsLine(rawLine) {
+  if (typeof rawLine !== 'string') {
+    return '';
+  }
+  let trimmed = rawLine.replace(/\\r/g, '').trim();
+  if (!trimmed) {
+    return '';
+  }
+  if (trimmed.startsWith('data:')) {
+    trimmed = trimmed.slice(5).trim();
+  }
+  if (!trimmed || trimmed === '[DONE]') {
+    return '';
+  }
+  return trimmed;
+}
+
 function parseTtsLine(line) {
 function parseTtsLine(line) {
+  const normalized = normalizeTtsLine(line);
+  if (!normalized) {
+    return false;
+  }
   try {
   try {
-    const parsed = JSON.parse(line);
+    const parsed = JSON.parse(normalized);
     if (parsed && parsed.audio) {
     if (parsed && parsed.audio) {
       enqueueAudioChunk(parsed.audio);
       enqueueAudioChunk(parsed.audio);
       return true;
       return true;
@@ -1408,9 +1532,7 @@ function parseTtsLine(line) {
 async function consumeTtsResponse(response) {
 async function consumeTtsResponse(response) {
   let chunkCount = 0;
   let chunkCount = 0;
   const handleLine = rawLine => {
   const handleLine = rawLine => {
-    const trimmed = rawLine.replace(/\\r/g, '').trim();
-    if (!trimmed) return;
-    if (parseTtsLine(trimmed)) {
+    if (parseTtsLine(rawLine)) {
       chunkCount += 1;
       chunkCount += 1;
     }
     }
   };
   };
@@ -1768,9 +1890,30 @@ $source_text_script
     }
     }
   }
   }
 
 
+  function normalizeProxyTtsLine(rawLine) {
+    if (typeof rawLine !== 'string') {
+      return '';
+    }
+    var trimmed = rawLine.replace(/\\r/g, '').trim();
+    if (!trimmed) {
+      return '';
+    }
+    if (trimmed.indexOf('data:') === 0) {
+      trimmed = trimmed.slice(5).trim();
+    }
+    if (!trimmed || trimmed === '[DONE]') {
+      return '';
+    }
+    return trimmed;
+  }
+
   function parseTtsLine(line) {
   function parseTtsLine(line) {
+    var normalized = normalizeProxyTtsLine(line);
+    if (!normalized) {
+      return false;
+    }
     try {
     try {
-      var parsed = JSON.parse(line);
+      var parsed = JSON.parse(normalized);
       if (parsed && parsed.audio) {
       if (parsed && parsed.audio) {
         enqueueAudioChunk(parsed.audio);
         enqueueAudioChunk(parsed.audio);
         return true;
         return true;
@@ -1784,9 +1927,7 @@ $source_text_script
   async function consumeTtsResponse(response) {
   async function consumeTtsResponse(response) {
     var chunkCount = 0;
     var chunkCount = 0;
     var handleLine = function(rawLine) {
     var handleLine = function(rawLine) {
-      var trimmed = rawLine.replace(/\\r/g, '').trim();
-      if (!trimmed) return;
-      if (parseTtsLine(trimmed)) {
+      if (parseTtsLine(rawLine)) {
         chunkCount += 1;
         chunkCount += 1;
       }
       }
     };
     };
@@ -2476,7 +2617,9 @@ class SimpleHTMLStripper(HTMLParser):
         blocks = self._selected_blocks()
         blocks = self._selected_blocks()
         if not blocks:
         if not blocks:
             return ""
             return ""
-        return "\n\n".join(block["text"] for block in blocks)
+        # Keep natural paragraphs contiguous with a single newline instead of
+        # injecting blank lines that did not exist in the source.
+        return "\n".join(block["text"] for block in blocks)
 
 
     def _selected_blocks(self) -> List[Dict[str, Any]]:
     def _selected_blocks(self) -> List[Dict[str, Any]]:
         if not self._blocks:
         if not self._blocks:
@@ -2571,6 +2714,24 @@ def _build_paragraph_metadata(blocks: List[Dict[str, Any]]) -> List[Dict[str, st
     return paragraph_meta
     return paragraph_meta
 
 
 
 
+def _build_paragraph_ranges(blocks: List[Dict[str, Any]]) -> List[Tuple[int, int]]:
+    """Map each stripped block to its char span within the joined plain text."""
+    if not blocks:
+        return []
+    ranges: List[Tuple[int, int]] = []
+    cursor = 0
+    for idx, block in enumerate(blocks):
+        text = block.get("text") or ""
+        start = cursor
+        end = start + len(text)
+        ranges.append((start, end))
+        cursor = end
+        # Plain text joins blocks with a single newline; skip trailing newline.
+        if idx < len(blocks) - 1:
+            cursor += 1
+    return ranges
+
+
 def _decode_html_bytes(raw_content: bytes, encoding_hint: Optional[str]) -> str:
 def _decode_html_bytes(raw_content: bytes, encoding_hint: Optional[str]) -> str:
     encoding_candidates: List[str] = []
     encoding_candidates: List[str] = []
     if encoding_hint:
     if encoding_hint:
@@ -2643,7 +2804,15 @@ async def _download_html_with_fallback(url: str) -> str:
 
 
 async def _fetch_remote_plaintext(
 async def _fetch_remote_plaintext(
     url: str,
     url: str,
-) -> Tuple[str, str, str, List[Dict[str, str]], List[Dict[str, str]], List[Dict[str, str]]]:
+) -> Tuple[
+    str,
+    str,
+    str,
+    List[Dict[str, str]],
+    List[Dict[str, str]],
+    List[Dict[str, str]],
+    List[Tuple[int, int]],
+]:
     normalized = _normalize_target_url(url)
     normalized = _normalize_target_url(url)
     html_body = await _download_html_with_fallback(normalized)
     html_body = await _download_html_with_fallback(normalized)
     stripper = SimpleHTMLStripper()
     stripper = SimpleHTMLStripper()
@@ -2653,6 +2822,7 @@ async def _fetch_remote_plaintext(
     code_blocks = stripper.get_code_blocks()
     code_blocks = stripper.get_code_blocks()
     plain_text = stripper.get_text()
     plain_text = stripper.get_text()
     block_info = stripper.get_blocks()
     block_info = stripper.get_blocks()
+    paragraph_ranges = _build_paragraph_ranges(block_info)
     if not plain_text:
     if not plain_text:
         plain_text = _fallback_html_to_text(html_body)
         plain_text = _fallback_html_to_text(html_body)
         if not plain_text:
         if not plain_text:
@@ -2662,8 +2832,9 @@ async def _fetch_remote_plaintext(
         images = []
         images = []
         code_blocks = []
         code_blocks = []
         block_info = []
         block_info = []
+        paragraph_ranges = []
     paragraph_meta = _build_paragraph_metadata(block_info)
     paragraph_meta = _build_paragraph_metadata(block_info)
-    return normalized, title, plain_text, images, code_blocks, paragraph_meta
+    return normalized, title, plain_text, images, code_blocks, paragraph_meta, paragraph_ranges
 
 
 
 
 def _render_proxy_page(
 def _render_proxy_page(

+ 2 - 0
spacyback/requirements.txt

@@ -7,3 +7,5 @@ benepar>=0.2.0
 
 
 # ASGI server used by start.sh
 # ASGI server used by start.sh
 uvicorn[standard]>=0.29.0
 uvicorn[standard]>=0.29.0
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+

+ 13 - 3
spacyback/start.sh

@@ -1,4 +1,14 @@
 #!/bin/bash
 #!/bin/bash
-# Disable uvicorn access logs to avoid noise from automated probes
-# (e.g. /wp-includes/wlwmanifest.xml, /xmlrpc.php) hitting the service.
-nohup uvicorn mainspacy:app --host 0.0.0.0 --port 12012 --no-access-log &
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+# Ensure only one mainspacy instance keeps port 12012 (reload/no-reload).
+pkill -f "uvicorn mainspacy:app .*--port 12012" 2>/dev/null || true
+sleep 1
+
+# Default to low-memory mode. Set ENABLE_BENEPAR=1 to re-enable constituency parser.
+ENABLE_BENEPAR="${ENABLE_BENEPAR:-0}" \
+setsid /root/miniconda3/envs/py311/bin/uvicorn mainspacy:app --host 0.0.0.0 --port 12012 --no-access-log > nohup.out 2>&1 < /dev/null &
+
+echo "started mainspacy pid=$!"

+ 1 - 0
spacyback/start_mem.sh

@@ -0,0 +1 @@
+ENABLE_BENEPAR=1 bash /home/myproc/chrome_grammarly/spacyback/start.sh

+ 47 - 90
spacyback/style_config.py

@@ -41,10 +41,7 @@ STYLE_RULES: List[StyleRule] = [
         selector=".sentence-scope",
         selector=".sentence-scope",
         target="句子外层容器",
         target="句子外层容器",
         description="包裹整句,方便显示序号与复杂度指示。",
         description="包裹整句,方便显示序号与复杂度指示。",
-        css=(
-            "position:relative;display:inline;padding:0;margin:0;"
-            "box-decoration-break:clone"
-        ),
+        css=("position:relative;display:inline;padding:0;margin:0;box-decoration-break:clone"),
     ),
     ),
     StyleRule(
     StyleRule(
         selector=".sentence-scope::before",
         selector=".sentence-scope::before",
@@ -114,24 +111,7 @@ STYLE_RULES: List[StyleRule] = [
         description="使用原编号恢复有序列表样式。",
         description="使用原编号恢复有序列表样式。",
         css="content:attr(data-list-index) '. ';",
         css="content:attr(data-list-index) '. ';",
     ),
     ),
-    # StyleRule(
-    #     selector=".sentence-scope[data-complex='1']",
-    #     target="复杂句提示",
-    #     description="复杂句底部加淡橙色阴影,以提示结构较复杂。",
-    #     css="box-shadow:inset 0 -0.2rem 0 rgba(250,209,155,.6)",
-    # ),
-    # StyleRule(
-    #     selector=".analysis[data-helper='on'] .sentence-scope::after",
-    #     target="句子辅助说明",
-    #     description="在句后输出中文提示,解释成分与从句情况。",
-    #     css="content:attr(data-note);display:block;font-size:.85rem;color:#64748b;margin:.2rem 0 .45rem 1.5rem;line-height:1.4",
-    # ),
-    # StyleRule(
-    #     selector=".analysis[data-helper='off'] .sentence-scope::after",
-    #     target="关闭辅助说明",
-    #     description="当 helper 关闭时隐藏说明,避免额外占位。",
-    #     css="content:'';display:none",
-    # ),
+
     StyleRule(
     StyleRule(
         selector=".role-subject",
         selector=".role-subject",
         target="主语",
         target="主语",
@@ -142,58 +122,64 @@ STYLE_RULES: List[StyleRule] = [
         selector=".role-predicate",
         selector=".role-predicate",
         target="谓语动词",
         target="谓语动词",
         description="字体加粗,强调谓语中心。",
         description="字体加粗,强调谓语中心。",
-        css="color:#000000!important;font-weight:700;",
+        css="color:#000000;font-weight:700;",
+    ),
+    StyleRule(
+        selector=".role-predicate-clause",
+        target="从句谓语动词",
+        description="使用深蓝色加粗以凸显从句谓语。",
+        css="color:#0d1c5e;font-weight:700;font-style:italic;",
     ),
     ),
     StyleRule(
     StyleRule(
         selector=".role-object-do",
         selector=".role-object-do",
         target="直接宾语",
         target="直接宾语",
-        description="浅绿底色显示直接宾语。",
+        description="浅绿字体显示直接宾语。",
         # css="background-color:#e5ffcc",
         # css="background-color:#e5ffcc",
-        css ="border-bottom:2px solid #e5ffcc; color:#2a5700"
+        css ="color:#2a5700"
     ),
     ),
     StyleRule(
     StyleRule(
         selector=".role-object-io",
         selector=".role-object-io",
         target="间接宾语",
         target="间接宾语",
-        description="黄绿底色区分间接宾语。",
+        description="黄绿字体区分间接宾语。",
         # css="background-color:#cef0a3",
         # css="background-color:#cef0a3",
-        css ="border-bottom:2px solid #120d4a; color:#120d4a"
-    ),
-    StyleRule(
-        selector=".role-complement",
-        target="表语/主补语",
-        description="实线下划线指示补语区域。",
-        css="border-bottom:2px solid #e6a04c",
-    ),
-    StyleRule(
-        selector=".role-object-complement",
-        target="宾补",
-        description="虚线下划线提示补充说明的宾补。",
-        css="border-bottom:2px dashed #e6a04c",
-    ),
-    StyleRule(
-        selector=".role-apposition",
-        target="同位语",
-        description="蓝色立线和缩进强调同位语说明。",
-        css="border-left:2px solid #63a4d4;padding-left:.15rem",
+        css ="color:#0b6779"
     ),
     ),
+    # StyleRule(
+    #     selector=".role-complement",
+    #     target="表语/主补语",
+    #     description="实线下划线指示补语区域。",
+    #     css="border-bottom:2px dotted #af6a18",
+    # ),
+    # StyleRule(
+    #     selector=".role-object-complement",
+    #     target="宾补",
+    #     description="虚线下划线提示补充说明的宾补。",
+    #     css="border-bottom:2px dotted #92252c",
+    # ),
+    # StyleRule(
+    #     selector=".role-apposition",
+    #     target="同位语",
+    #     description="蓝色立线和缩进强调同位语说明。",
+    #     css="border-left:2px dotted #63a4d4;padding-left:.15rem",
+    # ),
     StyleRule(
     StyleRule(
         selector=".role-adverbial",
         selector=".role-adverbial",
         target="状语短语",
         target="状语短语",
-        description="黄绿底色突出状语信息。",
+        description="深绿实线突出状语信息。",
         # css="background-color:#f6fef8",
         # css="background-color:#f6fef8",
-        css="border-bottom:2px solid #f6fef8",
+        css="border-bottom:1px dotted #1cbaca",
     ),
     ),
     StyleRule(
     StyleRule(
         selector=".verbal-infinitive",
         selector=".verbal-infinitive",
         target="不定式结构",
         target="不定式结构",
-        description="虚线下划线提示 to+动词的不定式短语。",
-        css="border-bottom:2px dashed #c084fc;color:#581c87",
+        description="颜色提示 to+动词的不定式短语。",
+        css="color:#200d72",
     ),
     ),
     StyleRule(
     StyleRule(
         selector=".verbal-gerund",
         selector=".verbal-gerund",
         target="动名词结构",
         target="动名词结构",
         description="淡紫底纹提示 V-ing 充当名词的结构。",
         description="淡紫底纹提示 V-ing 充当名词的结构。",
-        css="border-bottom:2px dashed #c084fc;color:#581c87",
+        css="color:#3f033d",
     ),
     ),
     StyleRule(
     StyleRule(
         selector=".role-connector",
         selector=".role-connector",
@@ -207,12 +193,12 @@ STYLE_RULES: List[StyleRule] = [
     #     description="更浅的背景温和提示限定词。",
     #     description="更浅的背景温和提示限定词。",
     #     css="background-color:#f8fafc;color:#475569",
     #     css="background-color:#f8fafc;color:#475569",
     # ),
     # ),
-    StyleRule(
-        selector=".role-modifier",
-        target="形容词或并列修饰",
-        description="虚线下划线标出修饰信息,保证主体和修饰对比。",
-        css="border-bottom:1px dotted #93c5fd",
-    ),
+    # StyleRule(
+    #     selector=".role-modifier",
+    #     target="形容词或并列修饰",
+    #     description="虚线下划线标出修饰信息,保证主体和修饰对比。",
+    #     css="border-bottom:1px dotted #93c5fd",
+    # ),
     StyleRule(
     StyleRule(
         selector=".role-parenthetical",
         selector=".role-parenthetical",
         target="插入语",
         target="插入语",
@@ -258,41 +244,12 @@ STYLE_RULES: List[StyleRule] = [
     
     
     
     
     # StyleRule(
     # StyleRule(
-    #     selector=".analysis[data-helper='on'] .clause-relative[data-modifies]::before,.analysis[data-helper='on'] .clause-adverbial[data-modifies]::before",
-    #     target="从句修饰箭头",
-    #     description="在辅助开启时显示“→”指向被修饰的成分。",
-    #     css="content:'→'attr(data-modifies)' ';color:#666;font-size:.85em",
-    # ),
-    # StyleRule(
-    #     selector=".analysis[data-helper='on'] .clause-adverbial[data-function]::after",
-    #     target="状语从句功能标签",
-    #     description="在尾部追加方括号说明(时间/原因等)。",
-    #     css="content:' ['attr(data-function)']';color:#1b5e20;font-size:.85em",
-    # ),
-    # StyleRule(
-    #     selector=".analysis[data-helper='on'] .clause-noun[data-clause-role]::after",
-    #     target="名词从句句法角色",
-    #     description="括号提示该名词从句在句中的角色(主语/宾语)。",
-    #     css="content:' ('attr(data-clause-role)')';color:#3f6212;font-size:.78em",
-    # ),
-    StyleRule(
-        selector=".phrase-fixed",
-        target="固定搭配",
-        description="米色底与虚线强调固定表达或习语。",
-        css="background-color:#fff8f0;border-bottom:1px dashed #c28150",
-    ),
-    # StyleRule(
-    #     selector=".role-residual",
-    #     target="未分类成分",
-    #     description="浅灰背景提示未归类成分,并通过 data-role 提供中文标签。",
-    #     css="background-color:#f6f8fa;color:#475569;border-bottom:1px dotted #cbd5e1",
-    # ),
-    # StyleRule(
-    #     selector=".lex-rare",
-    #     target="低频词",
-    #     description="深蓝色字体提示低频或重点词汇。",
-    #     css="color:#000080",
+    #     selector=".phrase-fixed",
+    #     target="固定搭配",
+    #     description="米色底与虚线强调固定表达或习语。",
+    #     css="background-color:#fff8f0;border-bottom:1px dashed #c28150",
     # ),
     # ),
+
 ]
 ]
 
 
 STYLE_BLOCK = build_style_block(STYLE_RULES)
 STYLE_BLOCK = build_style_block(STYLE_RULES)