Przeglądaj źródła

修改中间版本,效果不理想

Gogs 2 miesięcy temu
rodzic
commit
3eac494bfb
2 zmienionych plików z 153 dodań i 31 usunięć
  1. 108 1
      spacyback/mainspacy.py
  2. 45 30
      spacyback/style_config.py

+ 108 - 1
spacyback/mainspacy.py

@@ -201,6 +201,70 @@ CLAUSE_FUNCTION_LABELS = {
     "RESULT": "结果",
     "PURPOSE": "目的",
 }
+
+
+def _iter_infinitive_markers(token: SpacyToken) -> List[SpacyToken]:
+    """Collect 'to' markers attached to a verb head."""
+    markers = []
+    for child in token.children:
+        if child.lower_ == "to" and child.tag_ == "TO":
+            markers.append(child)
+    return markers
+
+
+def _token_is_infinitive(token: SpacyToken) -> bool:
+    if token.pos_ not in {"VERB", "AUX"}:
+        return False
+    verb_forms = set(token.morph.get("VerbForm"))
+    if "Inf" not in verb_forms and token.tag_ != "VB":
+        return False
+    return bool(_iter_infinitive_markers(token))
+
+
+def _token_is_gerund(token: SpacyToken) -> bool:
+    if token.pos_ not in {"VERB", "AUX"}:
+        return False
+    verb_forms = set(token.morph.get("VerbForm"))
+    if "Ger" in verb_forms:
+        return True
+    return token.tag_ == "VBG"
+
+
+def _annotate_nonfinite_verbals(
+    sentence: SpacySpan,
+    spans: List[Span],
+    mapping: Dict[int, int],
+) -> None:
+    """Highlight infinitive和gerund短语,帮助识别非限定动词。"""
+    for token in sentence:
+        if _token_is_infinitive(token):
+            start_char, end_char = subtree_char_span(token)
+            markers = _iter_infinitive_markers(token)
+            if markers:
+                start_char = min(start_char, min(child.idx for child in markers))
+            add_char_based_span(
+                spans,
+                start_char,
+                end_char,
+                "verbal-infinitive",
+                mapping,
+                attrs={"data-form": "不定式"},
+            )
+    seen_gerunds = set()
+    for token in sentence:
+        if token.i in seen_gerunds:
+            continue
+        if _token_is_gerund(token):
+            start_char, end_char = subtree_char_span(token)
+            add_char_based_span(
+                spans,
+                start_char,
+                end_char,
+                "verbal-gerund",
+                mapping,
+                attrs={"data-form": "动名词"},
+            )
+            seen_gerunds.add(token.i)
 RESIDUAL_DEP_LABELS = {
     "det": "限定词",
     "prep": "介词",
@@ -472,6 +536,47 @@ def _predicate_span_bounds(head: SpacyToken) -> Tuple[int, int]:
     return start_char, end_char
 
 
+def _token_is_finite(token: SpacyToken) -> bool:
+    """Return True if token carries finite verb morphology."""
+    if token.pos_ not in {"VERB", "AUX"}:
+        return False
+    verb_forms = set(token.morph.get("VerbForm"))
+    if "Fin" in verb_forms or "Imp" in verb_forms:
+        return True
+    if token.tag_ in FINITE_VERB_TAGS or token.tag_ == "MD":
+        return True
+    return False
+
+
+def _has_finite_auxiliary(token: SpacyToken) -> bool:
+    """Detect whether the verb head has a finite auxiliary helper."""
+    for child in token.children:
+        if child.dep_ in {"aux", "auxpass", "cop"} and _token_is_finite(child):
+            return True
+    return False
+
+
+def _is_finite_predicate_head(token: SpacyToken) -> bool:
+    """Filter predicate heads to exclude bare infinitives/participles."""
+    if _token_is_finite(token):
+        return True
+    verb_forms = set(token.morph.get("VerbForm"))
+    if "Inf" in verb_forms:
+        return False
+    if verb_forms & {"Part", "Ger"}:
+        return _has_finite_auxiliary(token)
+    if token.tag_ in NONFINITE_VERB_TAGS:
+        return _has_finite_auxiliary(token)
+    if token.tag_ == "VB":
+        has_to_marker = any(
+            child.dep_ == "mark" and child.lower_ == "to" for child in token.children
+        )
+        if has_to_marker:
+            return False
+        return token.dep_ == "ROOT"
+    return False
+
+
 def _predicate_heads(sentence: SpacySpan) -> List[SpacyToken]:
     """Collect predicate heads including coordinated verbs."""
     candidates: List[SpacyToken] = []
@@ -492,7 +597,8 @@ def _predicate_heads(sentence: SpacySpan) -> List[SpacyToken]:
         if tok.i in seen:
             continue
         seen.add(tok.i)
-        ordered.append(tok)
+        if _is_finite_predicate_head(tok):
+            ordered.append(tok)
     return ordered
 
 
@@ -622,6 +728,7 @@ def annotate_sentence(
             if span and any(tok.tag_ == "VBG" for tok in span):
                 add_span(spans, first_comma, second_comma + 1, "role-absolute")
 
+    _annotate_nonfinite_verbals(sentence, spans, mapping)
     annotate_constituents(
         sentence,
         spans,

+ 45 - 30
spacyback/style_config.py

@@ -120,41 +120,43 @@ STYLE_RULES: List[StyleRule] = [
     #     description="复杂句底部加淡橙色阴影,以提示结构较复杂。",
     #     css="box-shadow:inset 0 -0.2rem 0 rgba(250,209,155,.6)",
     # ),
-    StyleRule(
-        selector=".analysis[data-helper='on'] .sentence-scope::after",
-        target="句子辅助说明",
-        description="在句后输出中文提示,解释成分与从句情况。",
-        css="content:attr(data-note);display:block;font-size:.85rem;color:#64748b;margin:.2rem 0 .45rem 1.5rem;line-height:1.4",
-    ),
-    StyleRule(
-        selector=".analysis[data-helper='off'] .sentence-scope::after",
-        target="关闭辅助说明",
-        description="当 helper 关闭时隐藏说明,避免额外占位。",
-        css="content:'';display:none",
-    ),
+    # StyleRule(
+    #     selector=".analysis[data-helper='on'] .sentence-scope::after",
+    #     target="句子辅助说明",
+    #     description="在句后输出中文提示,解释成分与从句情况。",
+    #     css="content:attr(data-note);display:block;font-size:.85rem;color:#64748b;margin:.2rem 0 .45rem 1.5rem;line-height:1.4",
+    # ),
+    # StyleRule(
+    #     selector=".analysis[data-helper='off'] .sentence-scope::after",
+    #     target="关闭辅助说明",
+    #     description="当 helper 关闭时隐藏说明,避免额外占位。",
+    #     css="content:'';display:none",
+    # ),
     StyleRule(
         selector=".role-subject",
         target="主语",
         description="淡黄色底纹突出主语位置。",
-        css="background-color:#fff3bf",
+        css="background-color:#fcfee1",
     ),
     StyleRule(
         selector=".role-predicate",
         target="谓语动词",
-        description="深玫红字体加粗,强调谓语中心。",
-        css="color:#000000!important;font-weight:700;background-color:rgba(255,235,239,.8)",
+        description="字体加粗,强调谓语中心。",
+        css="color:#000000!important;font-weight:700;",
     ),
     StyleRule(
         selector=".role-object-do",
         target="直接宾语",
         description="浅绿底色显示直接宾语。",
-        css="background-color:#e5ffcc",
+        # css="background-color:#e5ffcc",
+        css ="border-bottom:2px solid #e5ffcc; color:#2a5700"
     ),
     StyleRule(
         selector=".role-object-io",
         target="间接宾语",
         description="黄绿底色区分间接宾语。",
-        css="background-color:#cef0a3",
+        # css="background-color:#cef0a3",
+        css ="border-bottom:2px solid #120d4a; color:#120d4a"
     ),
     StyleRule(
         selector=".role-complement",
@@ -178,7 +180,20 @@ STYLE_RULES: List[StyleRule] = [
         selector=".role-adverbial",
         target="状语短语",
         description="黄绿底色突出状语信息。",
-        css="background-color:#e7fded",
+        # css="background-color:#f6fef8",
+        css="border-bottom:2px solid #f6fef8",
+    ),
+    StyleRule(
+        selector=".verbal-infinitive",
+        target="不定式结构",
+        description="虚线下划线提示 to+动词的不定式短语。",
+        css="border-bottom:2px dashed #c084fc;color:#581c87",
+    ),
+    StyleRule(
+        selector=".verbal-gerund",
+        target="动名词结构",
+        description="淡紫底纹提示 V-ing 充当名词的结构。",
+        css="border-bottom:2px dashed #c084fc;color:#581c87",
     ),
     StyleRule(
         selector=".role-connector",
@@ -216,12 +231,12 @@ STYLE_RULES: List[StyleRule] = [
     #     description="统一使用彩色立线和左内边距包裹从句内容。",
     #     css="border-left:2px solid currentColor;padding-left:.25rem;margin-left:.1rem",
     # ),
-    # StyleRule(
-    #     selector=".clause-noun",
-    #     target="名词从句",
-    #     description="绿色配色突出名词性从句。",
-    #     css="color:#5c8f1d;background-color:rgba(158,201,134,.18)",
-    # ),
+    StyleRule(
+        selector=".clause-noun",
+        target="名词从句",
+        description="绿色配色突出名词性从句。",
+        css="color:#5c8f1d;background-color:rgba(158,201,134,.18)",
+    ),
     StyleRule(
         selector=".clause-relative",
         target="定语从句",
@@ -272,12 +287,12 @@ STYLE_RULES: List[StyleRule] = [
     #     description="浅灰背景提示未归类成分,并通过 data-role 提供中文标签。",
     #     css="background-color:#f6f8fa;color:#475569;border-bottom:1px dotted #cbd5e1",
     # ),
-    StyleRule(
-        selector=".lex-rare",
-        target="低频词",
-        description="深蓝色字体提示低频或重点词汇。",
-        css="color:#000080",
-    ),
+    # StyleRule(
+    #     selector=".lex-rare",
+    #     target="低频词",
+    #     description="深蓝色字体提示低频或重点词汇。",
+    #     css="color:#000080",
+    # ),
 ]
 
 STYLE_BLOCK = build_style_block(STYLE_RULES)