ソースを参照

修改CUDA为cpu版本

Gogs 1 週間 前
コミット
fc26d48d8f
4 ファイル変更43 行追加11 行削除
  1. 39 7
      spacyback/mainspacy.py
  2. 2 0
      spacyback/requirements.txt
  3. 1 3
      spacyback/start.sh
  4. 1 1
      spacyback/style_config.py

+ 39 - 7
spacyback/mainspacy.py

@@ -9,7 +9,7 @@ from collections import Counter
 from dataclasses import dataclass, field
 from html.parser import HTMLParser
 from string import Template
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 from urllib import error as urllib_error, request as urllib_request
 from urllib.parse import urlparse, urlunparse
 
@@ -380,6 +380,35 @@ def add_span(spans: List[Span], start_token: int, end_token: int, cls: str, attr
     spans.append(Span(start_token=start_token, end_token=end_token, cls=cls, attrs=attrs))
 
 
+def _prune_adverbial_spans(spans: List[Span], sentence_token_bounds: Tuple[int, int]) -> None:
+    """Drop redundant/oversized adverbial spans that make entire sentences underline."""
+    sent_start, sent_end = sentence_token_bounds
+    if sent_start < 0 or sent_end <= sent_start:
+        return
+    sent_length = sent_end - sent_start
+    filtered: List[Span] = []
+    seen_ranges: Set[Tuple[int, int]] = set()
+    for span in spans:
+        classes = span.cls.split()
+        if "role-adverbial" not in classes:
+            filtered.append(span)
+            continue
+        span_length = span.end_token - span.start_token
+        # Skip single-token adverbs and spans that swallow the whole sentence.
+        if span_length <= 1:
+            continue
+        coverage_start = max(span.start_token, sent_start)
+        coverage_end = min(span.end_token, sent_end)
+        if coverage_end - coverage_start >= sent_length:
+            continue
+        range_key = (coverage_start, coverage_end)
+        if range_key in seen_ranges:
+            continue
+        seen_ranges.add(range_key)
+        filtered.append(span)
+    spans[:] = filtered
+
+
 def subtree_char_span(token: SpacyToken) -> Tuple[int, int]:
     subtree = list(token.subtree)
     if not subtree:
@@ -724,12 +753,14 @@ def annotate_sentence(
         if tok.dep_ in {"amod", "poss", "compound", "nummod"}:
             add_token(tok, "role-modifier")
 
-    adverbial_ranges = set()
-    for tok in sentence:
-        if tok.dep_ in ADVERBIAL_DEPS:
-            adverbial_ranges.add(subtree_char_span(tok))
-    for start_char, end_char in adverbial_ranges:
-        add_char_based_span(spans, start_char, end_char, "role-adverbial", mapping)
+    # Dependency-based adverbial spans are a fallback when constituency data is unavailable.
+    if not HAS_BENEPAR or BENE_PAR_WARNING:
+        adverbial_ranges = set()
+        for tok in sentence:
+            if tok.dep_ in ADVERBIAL_DEPS:
+                adverbial_ranges.add(subtree_char_span(tok))
+        for start_char, end_char in adverbial_ranges:
+            add_char_based_span(spans, start_char, end_char, "role-adverbial", mapping)
 
     for tok in sentence:
         if tok.dep_ == "appos":
@@ -769,6 +800,7 @@ def annotate_sentence(
         summary,
     )
     _add_fixed_phrases(sentence, mapping, spans, summary)
+    _prune_adverbial_spans(spans, sent_bounds)
 
     return spans, summary
 

+ 2 - 0
spacyback/requirements.txt

@@ -7,3 +7,5 @@ benepar>=0.2.0
 
 # ASGI server used by start.sh
 uvicorn[standard]>=0.29.0
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+

+ 1 - 3
spacyback/start.sh

@@ -1,4 +1,2 @@
 #!/bin/bash
-# Disable uvicorn access logs to avoid noise from automated probes
-# (e.g. /wp-includes/wlwmanifest.xml, /xmlrpc.php) hitting the service.
-nohup uvicorn mainspacy:app --host 0.0.0.0 --port 12012 --no-access-log &
+nohup uvicorn mainspacy:app --host 0.0.0.0 --port 12012 --reload --no-access-log &

+ 1 - 1
spacyback/style_config.py

@@ -167,7 +167,7 @@ STYLE_RULES: List[StyleRule] = [
         target="状语短语",
         description="深绿实线突出状语信息。",
         # css="background-color:#f6fef8",
-        css="border-bottom:1.5px dotted #c8f9d4",
+        css="border-bottom:1px dotted #1cbaca",
     ),
     StyleRule(
         selector=".verbal-infinitive",