| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- from fastapi import FastAPI, Request, File, UploadFile, HTTPException, Form, Response
- from fastapi.middleware.cors import CORSMiddleware
- from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
- from fastapi.staticfiles import StaticFiles
- import os
- import shutil
- import uuid
- from pydantic import BaseModel
- import hashlib
- import asyncio
- from typing import AsyncGenerator
- import soundfile as sf
- import io
- import logging
- import numpy as np
- import re
- from kokoro import KPipeline # 假设 kokoro 已安装并可用
- # 设置日志
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
- # 初始化 FastAPI 应用
- app = FastAPI()
- # 配置 CORS
- origins = ["*"]
- app.add_middleware(
- CORSMiddleware,
- allow_origins=origins,
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
- )
- # 上传文件目录
- UPLOAD_DIRECTORY = "static/files"
- if not os.path.exists(UPLOAD_DIRECTORY):
- os.makedirs(UPLOAD_DIRECTORY)
- # 挂载静态文件
- app.mount("/static/files", StaticFiles(directory=UPLOAD_DIRECTORY), name="static_files")
- app.mount("/static/web", StaticFiles(directory="static/web"), name="static_web")
- app.mount("/static", StaticFiles(directory="static"), name="static")
- # 音频缓存目录
- CACHE_DIR = "audio_cache"
- os.makedirs(CACHE_DIR, exist_ok=True)
- # 根路径重定向到 PDF 查看器
- @app.get("/")
- def root():
- return RedirectResponse(url="/static/web/viewer.html?file=/static/files/compress.pdf")
- # 清理文件名
- def sanitize_filename(name: str) -> str:
- return "".join(c for c in name if c.isalnum() or c in (' ', '.', '_', '-')).rstrip()
- # PDF 上传端点
- @app.post("/upload-pdf")
- async def upload_pdf(file: UploadFile = File(...), custom_name: str = Form(...)):
- if file.content_type != 'application/pdf':
- raise HTTPException(status_code=400, detail="文件类型必须是 PDF")
- sanitized_name = sanitize_filename(custom_name)
- if not sanitized_name:
- return JSONResponse(status_code=400, content={"success": False, "error": "无效的文件名"})
- unique_filename = f"{sanitized_name}.pdf"
- file_path = os.path.join(UPLOAD_DIRECTORY, unique_filename)
- if os.path.exists(file_path):
- return JSONResponse(status_code=400, content={"success": False, "error": "文件名已存在,请使用其他名称"})
- try:
- with open(file_path, "wb") as buffer:
- shutil.copyfileobj(file.file, buffer)
- except Exception as e:
- raise HTTPException(status_code=500, detail="上传过程中出错")
- finally:
- file.file.close()
- file_relative_path = f"/static/files/{unique_filename}"
- return JSONResponse(content={"success": True, "file_path": file_relative_path})
- # 列出 PDF 文件端点
- @app.get("/list-pdfs")
- async def list_pdfs():
- try:
- files = os.listdir(UPLOAD_DIRECTORY)
- pdf_files = [
- {"name": file, "url": f"/static/files/{file}"}
- for file in files if file.lower().endswith(".pdf")
- ]
- return JSONResponse(content={"success": True, "files": pdf_files})
- except Exception as e:
- raise HTTPException(status_code=500, detail="无法获取文件列表")
- # TTS 服务类
- class TextToSpeechServer:
- def __init__(self):
- self.pipeline = None
- def load_model(self, lang_code='a'):
- try:
- logger.info("加载 KPipeline 模型...")
- self.pipeline = KPipeline(lang_code=lang_code)
- logger.info("模型加载成功")
- except Exception as e:
- logger.error(f"模型加载失败: {str(e)}")
- raise
- # 初始化 TTS 服务
- tts_server = TextToSpeechServer()
- # 应用启动时加载 Kokoro 模型
- @app.on_event("startup")
- async def startup_event():
- tts_server.load_model()
- # 请求模型
- class TextToSpeechRequest(BaseModel):
- user_input: str
- voice: str = 'af_heart' # 默认语音
- speed: float = 1.0 # 默认速度
- # 文本转语音端点(流式)
- @app.post("/text-to-speech/")
- async def text_to_speech(request: TextToSpeechRequest):
- user_input = request.user_input.strip()
- if not user_input:
- raise HTTPException(status_code=400, detail="输入文本为空")
- text_hash = hashlib.md5(user_input.encode('utf-8')).hexdigest()
- audio_path = os.path.join(CACHE_DIR, f"{text_hash}.wav")
- if os.path.exists(audio_path):
- with open(audio_path, "rb") as f:
- return Response(content=f.read(), media_type="audio/wav")
- async def audio_generator() -> AsyncGenerator[bytes, None]:
- try:
- if not tts_server.pipeline:
- raise HTTPException(status_code=503, detail="模型未初始化")
- generator = tts_server.pipeline(
- text=user_input,
- voice=request.voice,
- speed=request.speed,
- split_pattern=r'\n+'
- )
- full_audio_data = []
- for i, (gs, ps, audio) in enumerate(generator):
- full_audio_data.append(audio)
- concatenated_audio = np.concatenate(full_audio_data)
- buffer = io.BytesIO()
- sf.write(buffer, concatenated_audio, 24000, format='WAV')
- buffer.seek(0)
- audio_data = buffer.getvalue()
- yield audio_data
- with open(audio_path, "wb") as f:
- f.write(audio_data)
- except Exception as e:
- logger.error(f"TTS 错误: {str(e)}")
- raise HTTPException(status_code=500, detail=str(e))
- return StreamingResponse(audio_generator(), media_type="audio/wav")
- # 按句子分割文本
- def split_text_into_sentences(text: str) -> list:
- # 使用正则表达式按句号、问号、感叹号分割句子
- sentences = re.split(r'(?<=[.!?])\s+', text.strip())
- return [s.strip() for s in sentences if s.strip()]
- # 生成单句音频
- async def generate_kokoro_audio(chunk: str, voice: str, speed: float) -> AsyncGenerator[bytes, None]:
- text_hash = hashlib.md5(chunk.encode('utf-8')).hexdigest()
- audio_path = os.path.join(CACHE_DIR, f"{text_hash}.wav")
- if os.path.exists(audio_path):
- with open(audio_path, "rb") as f:
- yield f.read()
- else:
- try:
- if not tts_server.pipeline:
- raise HTTPException(status_code=503, detail="模型未初始化")
- generator = tts_server.pipeline(
- text=chunk,
- voice=voice,
- speed=speed,
- split_pattern=r'\n+'
- )
- full_audio_buffer = io.BytesIO()
- for i, (gs, ps, audio) in enumerate(generator):
- buffer = io.BytesIO()
- sf.write(buffer, audio, 24000, format='WAV')
- buffer.seek(0)
- audio_data = buffer.getvalue()
- yield audio_data
- full_audio_buffer.write(audio_data)
- break # 仅取第一个片段
- full_audio_buffer.seek(0)
- with open(audio_path, "wb") as f:
- f.write(full_audio_buffer.getvalue())
- except Exception as e:
- raise HTTPException(status_code=500, detail=f"TTS生成失败: {str(e)}")
- # 页面转语音端点(按句子逐句转换并播放)
- @app.post("/page-to-speech/")
- async def page_to_speech(request: TextToSpeechRequest):
- user_input = request.user_input.strip()
- if not user_input:
- raise HTTPException(status_code=400, detail="输入文本为空")
- full_text_hash = hashlib.md5(user_input.encode('utf-8')).hexdigest()
- full_audio_path = os.path.join(CACHE_DIR, f"{full_text_hash}_full.wav")
- if os.path.exists(full_audio_path):
- return StreamingResponse(open(full_audio_path, "rb"), media_type="audio/wav")
- sentences = split_text_into_sentences(user_input)
- if not sentences:
- raise HTTPException(status_code=400, detail="没有有效的句子")
- async def audio_generator() -> AsyncGenerator[bytes, None]:
- full_audio_buffer = io.BytesIO() # 用于缓存完整音频
- for sentence in sentences:
- logger.info(f"处理句子: {sentence}")
- async for audio_data in generate_kokoro_audio(sentence, request.voice, request.speed):
- yield audio_data # 立即流式传输当前句子的音频
- full_audio_buffer.write(audio_data)
- await asyncio.sleep(0) # 让出控制权给事件循环
- # 保存完整音频到缓存
- full_audio_buffer.seek(0)
- with open(full_audio_path, "wb") as f:
- f.write(full_audio_buffer.getvalue())
- return StreamingResponse(audio_generator(), media_type="audio/wav")
- # 健康检查
- @app.get("/health")
- async def health_check():
- return {"status": "healthy" if tts_server.pipeline else "model_not_loaded"}
- if __name__ == "__main__":
- import uvicorn
- uvicorn.run(app, host="0.0.0.0", port=8005)
|