Ver Fonte

优化内存

Gogs há 1 semana atrás
pai
commit
536cd9c300
3 ficheiros alterados com 34 adições e 4 exclusões
  1. 20 3
      spacyback/mainspacy.py
  2. 13 1
      spacyback/start.sh
  3. 1 0
      spacyback/start_mem.sh

+ 20 - 3
spacyback/mainspacy.py

@@ -4,6 +4,7 @@
 import asyncio
 import asyncio
 import html
 import html
 import json
 import json
+import os
 import re
 import re
 from collections import Counter
 from collections import Counter
 from dataclasses import dataclass, field
 from dataclasses import dataclass, field
@@ -13,7 +14,6 @@ from typing import Any, Dict, List, Optional, Set, Tuple
 from urllib import error as urllib_error, request as urllib_request
 from urllib import error as urllib_error, request as urllib_request
 from urllib.parse import urlparse, urlunparse
 from urllib.parse import urlparse, urlunparse
 
 
-import benepar
 import httpx
 import httpx
 import spacy
 import spacy
 from fastapi import FastAPI, HTTPException
 from fastapi import FastAPI, HTTPException
@@ -47,7 +47,7 @@ def _load_spacy_pipeline(
     except OSError:
     except OSError:
         try:
         try:
             spacy_download(model_name)
             spacy_download(model_name)
-            nlp = spacy.load(model_name)
+            nlp = spacy.load(model_name, disable=["tagger", "lemmatizer"])
         except Exception as exc:  # pragma: no cover - install helper
         except Exception as exc:  # pragma: no cover - install helper
             raise RuntimeError(
             raise RuntimeError(
                 f"spaCy model '{model_name}' is required. Install via `python -m spacy download {model_name}`."
                 f"spaCy model '{model_name}' is required. Install via `python -m spacy download {model_name}`."
@@ -57,17 +57,34 @@ def _load_spacy_pipeline(
     pipe_names = set(nlp.pipe_names)
     pipe_names = set(nlp.pipe_names)
     if not ({"parser", "senter", "sentencizer"} & pipe_names):
     if not ({"parser", "senter", "sentencizer"} & pipe_names):
         try:
         try:
-            nlp.add_pipe("sentencizer")
+            nlp.add_pipe("sentencizer", disable=["tagger", "lemmatizer"])
         except Exception:
         except Exception:
             pass  # if already present or unavailable, ignore
             pass  # if already present or unavailable, ignore
 
 
+    enable_benepar = os.getenv("ENABLE_BENEPAR", "0").strip().lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+
+    if not enable_benepar:
+        BENE_PAR_WARNING = (
+            "Benepar is disabled by ENABLE_BENEPAR. Using dependency-based spans."
+        )
+        return nlp
+
     # Try to add benepar
     # Try to add benepar
     if "benepar" not in nlp.pipe_names:
     if "benepar" not in nlp.pipe_names:
         try:
         try:
+            import benepar
+
             nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
             nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
             HAS_BENEPAR = True
             HAS_BENEPAR = True
         except ValueError:
         except ValueError:
             try:
             try:
+                import benepar
+
                 benepar.download(benepar_model)
                 benepar.download(benepar_model)
                 nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
                 nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
                 HAS_BENEPAR = True
                 HAS_BENEPAR = True

+ 13 - 1
spacyback/start.sh

@@ -1,2 +1,14 @@
 #!/bin/bash
 #!/bin/bash
-nohup uvicorn mainspacy:app --host 0.0.0.0 --port 12012 --reload --no-access-log &
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+# Ensure only one mainspacy instance keeps port 12012 (reload/no-reload).
+pkill -f "uvicorn mainspacy:app .*--port 12012" 2>/dev/null || true
+sleep 1
+
+# Default to low-memory mode. Set ENABLE_BENEPAR=1 to re-enable constituency parser.
+ENABLE_BENEPAR="${ENABLE_BENEPAR:-0}" \
+setsid /root/miniconda3/envs/py311/bin/uvicorn mainspacy:app --host 0.0.0.0 --port 12012 --no-access-log > nohup.out 2>&1 < /dev/null &
+
+echo "started mainspacy pid=$!"

+ 1 - 0
spacyback/start_mem.sh

@@ -0,0 +1 @@
+ENABLE_BENEPAR=1 bash /home/myproc/chrome_grammarly/spacyback/start.sh