|
|
@@ -4,6 +4,7 @@
|
|
|
import asyncio
|
|
|
import html
|
|
|
import json
|
|
|
+import os
|
|
|
import re
|
|
|
from collections import Counter
|
|
|
from dataclasses import dataclass, field
|
|
|
@@ -13,7 +14,6 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
|
|
from urllib import error as urllib_error, request as urllib_request
|
|
|
from urllib.parse import urlparse, urlunparse
|
|
|
|
|
|
-import benepar
|
|
|
import httpx
|
|
|
import spacy
|
|
|
from fastapi import FastAPI, HTTPException
|
|
|
@@ -47,7 +47,7 @@ def _load_spacy_pipeline(
|
|
|
except OSError:
|
|
|
try:
|
|
|
spacy_download(model_name)
|
|
|
- nlp = spacy.load(model_name)
|
|
|
+ nlp = spacy.load(model_name, disable=["tagger", "lemmatizer"])
|
|
|
except Exception as exc: # pragma: no cover - install helper
|
|
|
raise RuntimeError(
|
|
|
f"spaCy model '{model_name}' is required. Install via `python -m spacy download {model_name}`."
|
|
|
@@ -57,17 +57,34 @@ def _load_spacy_pipeline(
|
|
|
pipe_names = set(nlp.pipe_names)
|
|
|
if not ({"parser", "senter", "sentencizer"} & pipe_names):
|
|
|
try:
|
|
|
- nlp.add_pipe("sentencizer")
|
|
|
+ nlp.add_pipe("sentencizer", disable=["tagger", "lemmatizer"])
|
|
|
except Exception:
|
|
|
pass # if already present or unavailable, ignore
|
|
|
|
|
|
+ enable_benepar = os.getenv("ENABLE_BENEPAR", "0").strip().lower() in {
|
|
|
+ "1",
|
|
|
+ "true",
|
|
|
+ "yes",
|
|
|
+ "on",
|
|
|
+ }
|
|
|
+
|
|
|
+ if not enable_benepar:
|
|
|
+ BENE_PAR_WARNING = (
|
|
|
+ "Benepar is disabled by ENABLE_BENEPAR. Using dependency-based spans."
|
|
|
+ )
|
|
|
+ return nlp
|
|
|
+
|
|
|
# Try to add benepar
|
|
|
if "benepar" not in nlp.pipe_names:
|
|
|
try:
|
|
|
+ import benepar
|
|
|
+
|
|
|
nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
|
|
|
HAS_BENEPAR = True
|
|
|
except ValueError:
|
|
|
try:
|
|
|
+ import benepar
|
|
|
+
|
|
|
benepar.download(benepar_model)
|
|
|
nlp.add_pipe("benepar", config={"model": benepar_model}, last=True)
|
|
|
HAS_BENEPAR = True
|