This commit is contained in:
2026-05-24 01:16:07 +08:00
commit 2ece5174a7
35 changed files with 2583 additions and 0 deletions

0
backend/app/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,54 @@
import json
import httpx
class LLMClient:
"""统一 LLM 接口,支持 OpenAI 兼容接口和 Anthropic 原生接口。"""
def __init__(self, provider: str, api_key: str, base_url: str, model: str):
self.provider = provider.lower()
self.api_key = api_key
self.base_url = base_url.rstrip("/")
self.model = model
async def complete(self, system_prompt: str, user_prompt: str) -> str:
if self.provider == "anthropic":
return await self._call_anthropic(system_prompt, user_prompt)
return await self._call_openai_compat(system_prompt, user_prompt)
async def _call_openai_compat(self, system_prompt: str, user_prompt: str) -> str:
"""适配 DeepSeek / 通义千问 / OpenAI 等兼容 /v1/chat/completions 的接口。"""
async with httpx.AsyncClient(timeout=90) as client:
resp = await client.post(
f"{self.base_url}/v1/chat/completions",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"temperature": 0.2,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
},
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
async def _call_anthropic(self, system_prompt: str, user_prompt: str) -> str:
async with httpx.AsyncClient(timeout=90) as client:
resp = await client.post(
f"{self.base_url}/v1/messages",
headers={
"x-api-key": self.api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={
"model": self.model,
"max_tokens": 2048,
"system": system_prompt,
"messages": [{"role": "user", "content": user_prompt}],
},
)
resp.raise_for_status()
return resp.json()["content"][0]["text"]

189
backend/app/ai/processor.py Normal file
View File

@@ -0,0 +1,189 @@
import json
import logging
from datetime import datetime, date
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from ..models.news import RawNews, ProcessedNews, LLMConfig, NewsSource, SystemLog
from ..crawler.rss_fetcher import fetch_rss
from .llm_client import LLMClient
logger = logging.getLogger(__name__)
SYSTEM_PROMPT = "你是医药行业资深分析师,擅长解读全球医药政策、临床研究、行业动态。"
ANALYSIS_PROMPT = """分析以下新闻,返回严格的 JSON 格式结果,不要包含任何其他文字。
新闻标题:{title}
新闻内容:{content}
新闻语言:{language}
返回格式:
{{
"is_medical_related": true,
"title_zh": "中文标题(英文原文请翻译成简洁中文)",
"summary": "中文摘要100-150字客观陈述核心内容",
"opinion": "核心观点或行业影响50-100字分析性语言点明实际意义",
"keywords": ["关键词1", "关键词2", "关键词3", "关键词4", "关键词5"],
"importance_score": 8.5,
"importance_reason": "评分理由30字内",
"category": "药品监管"
}}
category 只能是以下四个之一:药品监管 / 临床研究 / 行业动态 / 政策法规
importance_score 评分标准1-10
9-10重大监管决定 / 突破性研究 / 影响整个行业的政策
7-8 :行业重要动态,有明显商业或学术价值
5-6 :常规行业新闻,有一定参考价值
1-4 :普通资讯,信息价值有限
"""
async def _log(db: AsyncSession, level: str, event_type: str, message: str):
db.add(SystemLog(level=level, event_type=event_type, message=message))
await db.commit()
async def _get_active_llm(db: AsyncSession) -> LLMConfig | None:
result = await db.execute(select(LLMConfig).where(LLMConfig.is_active == True).limit(1))
return result.scalar_one_or_none()
async def _analyze_article(client: LLMClient, title: str, content: str, language: str) -> dict | None:
prompt = ANALYSIS_PROMPT.format(
title=title,
content=content[:2000] if content else "(无正文)",
language="中文" if language == "zh" else "英文",
)
try:
raw = await client.complete(SYSTEM_PROMPT, prompt)
raw = raw.strip()
if raw.startswith("```"):
raw = raw.split("```")[1]
if raw.startswith("json"):
raw = raw[4:]
return json.loads(raw)
except Exception as e:
logger.warning(f"LLM parse error: {e}")
return None
async def _select_top_10(db: AsyncSession, target: date):
"""Reset featured flags and elect TOP 10 with category diversity."""
result = await db.execute(
select(ProcessedNews)
.where(func.date(ProcessedNews.processed_at) == target)
.order_by(ProcessedNews.importance_score.desc())
)
all_news = result.scalars().all()
# Reset
for n in all_news:
n.is_featured = False
n.featured_rank = None
categories = ["药品监管", "临床研究", "行业动态", "政策法规"]
selected: list[ProcessedNews] = []
seen_cats: set[str] = set()
# First pass: one guaranteed per category
for cat in categories:
for n in all_news:
if n.category == cat and cat not in seen_cats and n not in selected:
selected.append(n)
seen_cats.add(cat)
break
# Second pass: fill up to 10 by score
for n in all_news:
if len(selected) >= 10:
break
if n not in selected:
selected.append(n)
for rank, n in enumerate(selected, start=1):
n.is_featured = True
n.featured_rank = rank
await db.commit()
return len(selected)
async def run_daily_pipeline(db: AsyncSession):
await _log(db, "INFO", "pipeline_start", "每日流水线启动")
llm_cfg = await _get_active_llm(db)
if not llm_cfg:
await _log(db, "ERROR", "pipeline_error", "未找到激活的 LLM 配置,请在管理后台配置")
return
client = LLMClient(
provider=llm_cfg.provider,
api_key=llm_cfg.api_key,
base_url=llm_cfg.base_url,
model=llm_cfg.model_name,
)
# ── 1. 抓取 ──────────────────────────────────────────────────────────────
sources_result = await db.execute(select(NewsSource).where(NewsSource.is_active == True))
sources = sources_result.scalars().all()
raw_added = 0
for src in sources:
items = await fetch_rss(src.url)
for item in items:
exists = await db.execute(select(RawNews.id).where(RawNews.url == item["url"]))
if exists.scalar_one_or_none():
continue
db.add(RawNews(
source_id=src.id,
title=item["title"],
url=item["url"],
raw_content=item["content"],
published_at=item["published_at"],
))
raw_added += 1
await db.commit()
await _log(db, "INFO", "crawl_done", f"抓取完成,新增 {raw_added} 条原始新闻")
# ── 2. AI 处理 ────────────────────────────────────────────────────────────
pending_result = await db.execute(
select(RawNews).join(RawNews.source).where(RawNews.status == "pending").limit(120)
)
pending = pending_result.scalars().all()
processed_count = 0
skipped_count = 0
for raw in pending:
language = raw.source.language if raw.source else "zh"
analysis = await _analyze_article(client, raw.title, raw.raw_content or "", language)
if not analysis or not analysis.get("is_medical_related"):
raw.status = "skipped"
skipped_count += 1
else:
db.add(ProcessedNews(
raw_news_id=raw.id,
title_zh=analysis.get("title_zh", raw.title),
summary=analysis.get("summary", ""),
opinion=analysis.get("opinion"),
keywords=analysis.get("keywords", []),
importance_score=float(analysis.get("importance_score", 5.0)),
importance_reason=analysis.get("importance_reason"),
category=analysis.get("category", "行业动态"),
source_name=raw.source.name if raw.source else "",
source_url=raw.url,
published_at=raw.published_at,
))
raw.status = "processed"
processed_count += 1
await db.commit()
await _log(db, "INFO", "process_done", f"AI 处理完成:{processed_count} 条入库,{skipped_count} 条跳过")
# ── 3. 精选 TOP 10 ────────────────────────────────────────────────────────
featured = await _select_top_10(db, date.today())
await _log(db, "INFO", "pipeline_done", f"流水线完成,精选 {featured} 条入今日 TOP 10")

View File

182
backend/app/api/admin.py Normal file
View File

@@ -0,0 +1,182 @@
from fastapi import APIRouter, Depends, HTTPException, Header
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from pydantic import BaseModel
from typing import Optional
import asyncio
from ..database import get_db
from ..models.news import LLMConfig, NewsSource, SystemLog, RawNews, ProcessedNews
from ..config import settings
router = APIRouter()
def verify_admin(authorization: str = Header(...)):
token = authorization.removeprefix("Bearer ").strip()
if token != settings.admin_token:
raise HTTPException(status_code=401, detail="Invalid admin token")
# ── LLM Config ────────────────────────────────────────────────────────────────
class LLMConfigIn(BaseModel):
name: str
provider: str
api_key: str
base_url: str
model_name: str
@router.get("/llm-config", dependencies=[Depends(verify_admin)])
async def get_llm_config(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(LLMConfig).where(LLMConfig.is_active == True).limit(1))
cfg = result.scalar_one_or_none()
if not cfg:
return None
return {
"id": cfg.id, "name": cfg.name, "provider": cfg.provider,
"api_key": "***" + cfg.api_key[-4:] if len(cfg.api_key) > 4 else "****",
"base_url": cfg.base_url, "model_name": cfg.model_name,
}
@router.post("/llm-config", dependencies=[Depends(verify_admin)])
async def save_llm_config(body: LLMConfigIn, db: AsyncSession = Depends(get_db)):
await db.execute(
LLMConfig.__table__.update().values(is_active=False)
)
cfg = LLMConfig(**body.model_dump(), is_active=True)
db.add(cfg)
await db.commit()
return {"ok": True, "id": cfg.id}
@router.post("/llm-config/test", dependencies=[Depends(verify_admin)])
async def test_llm_config(body: LLMConfigIn):
from ..ai.llm_client import LLMClient
client = LLMClient(
provider=body.provider,
api_key=body.api_key,
base_url=body.base_url,
model=body.model_name,
)
try:
reply = await client.complete(
system_prompt="你是一个助手。",
user_prompt="请回复'连接正常',不要说其他内容。",
)
return {"ok": True, "reply": reply}
except Exception as e:
return {"ok": False, "error": str(e)}
# ── News Sources ──────────────────────────────────────────────────────────────
class SourceIn(BaseModel):
name: str
url: str
source_type: str = "rss"
language: str = "zh"
category: Optional[str] = None
@router.get("/sources", dependencies=[Depends(verify_admin)])
async def get_sources(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(NewsSource).order_by(NewsSource.id))
sources = result.scalars().all()
return [
{"id": s.id, "name": s.name, "url": s.url, "source_type": s.source_type,
"language": s.language, "category": s.category, "is_active": s.is_active}
for s in sources
]
@router.post("/sources", dependencies=[Depends(verify_admin)])
async def add_source(body: SourceIn, db: AsyncSession = Depends(get_db)):
src = NewsSource(**body.model_dump())
db.add(src)
await db.commit()
return {"ok": True, "id": src.id}
@router.put("/sources/{source_id}", dependencies=[Depends(verify_admin)])
async def toggle_source(source_id: int, body: dict, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(NewsSource).where(NewsSource.id == source_id))
src = result.scalar_one_or_none()
if not src:
raise HTTPException(status_code=404)
if "is_active" in body:
src.is_active = body["is_active"]
await db.commit()
return {"ok": True}
@router.delete("/sources/{source_id}", dependencies=[Depends(verify_admin)])
async def delete_source(source_id: int, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(NewsSource).where(NewsSource.id == source_id))
src = result.scalar_one_or_none()
if src:
await db.delete(src)
await db.commit()
return {"ok": True}
# ── Trigger & Stats ───────────────────────────────────────────────────────────
_pipeline_running = False
@router.post("/crawl/trigger", dependencies=[Depends(verify_admin)])
async def trigger_crawl():
global _pipeline_running
if _pipeline_running:
return {"ok": False, "message": "Pipeline already running"}
_pipeline_running = True
asyncio.create_task(_run_pipeline())
return {"ok": True, "message": "Pipeline started"}
async def _run_pipeline():
global _pipeline_running
from ..scheduler import trigger_now
try:
await trigger_now()
finally:
_pipeline_running = False
@router.get("/stats", dependencies=[Depends(verify_admin)])
async def get_stats(db: AsyncSession = Depends(get_db)):
from datetime import date
today = date.today()
raw_today = (await db.execute(
select(func.count(RawNews.id)).where(func.date(RawNews.crawled_at) == today)
)).scalar_one()
processed_today = (await db.execute(
select(func.count(ProcessedNews.id)).where(func.date(ProcessedNews.processed_at) == today)
)).scalar_one()
featured_today = (await db.execute(
select(func.count(ProcessedNews.id))
.where(func.date(ProcessedNews.processed_at) == today)
.where(ProcessedNews.is_featured == True)
)).scalar_one()
return {
"raw_today": raw_today,
"processed_today": processed_today,
"featured_today": featured_today,
"pipeline_running": _pipeline_running,
}
@router.get("/logs", dependencies=[Depends(verify_admin)])
async def get_logs(limit: int = 100, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(SystemLog).order_by(SystemLog.created_at.desc()).limit(limit)
)
logs = result.scalars().all()
return [
{"id": l.id, "level": l.level, "event_type": l.event_type,
"message": l.message, "created_at": l.created_at.isoformat()}
for l in logs
]

96
backend/app/api/news.py Normal file
View File

@@ -0,0 +1,96 @@
from datetime import date, datetime
from typing import Optional
from fastapi import APIRouter, Depends, Query
from sqlalchemy import select, func, distinct
from sqlalchemy.ext.asyncio import AsyncSession
from ..database import get_db
from ..models.news import ProcessedNews, RawNews
router = APIRouter()
def _serialize(n: ProcessedNews) -> dict:
raw = n.raw_news
return {
"id": n.id,
"title_zh": n.title_zh,
"summary": n.summary,
"opinion": n.opinion,
"keywords": n.keywords or [],
"importance_score": n.importance_score,
"importance_reason": n.importance_reason,
"category": n.category,
"is_featured": n.is_featured,
"featured_rank": n.featured_rank,
"source_name": n.source_name or (raw.source.name if raw and raw.source else ""),
"source_url": n.source_url or (raw.url if raw else ""),
"published_at": n.published_at.isoformat() if n.published_at else None,
"processed_at": n.processed_at.isoformat() if n.processed_at else None,
}
@router.get("/featured")
async def get_featured(
news_date: Optional[str] = Query(default=None, alias="date"),
db: AsyncSession = Depends(get_db),
):
target = date.fromisoformat(news_date) if news_date else date.today()
stmt = (
select(ProcessedNews)
.join(ProcessedNews.raw_news)
.where(ProcessedNews.is_featured == True)
.where(func.date(ProcessedNews.processed_at) == target)
.order_by(ProcessedNews.featured_rank)
)
result = await db.execute(stmt)
items = result.scalars().all()
return {"date": str(target), "items": [_serialize(n) for n in items]}
@router.get("")
async def get_news(
news_date: Optional[str] = Query(default=None, alias="date"),
category: Optional[str] = Query(default=None),
page: int = Query(default=1, ge=1),
page_size: int = Query(default=20, ge=1, le=100),
db: AsyncSession = Depends(get_db),
):
target = date.fromisoformat(news_date) if news_date else date.today()
stmt = (
select(ProcessedNews)
.join(ProcessedNews.raw_news)
.where(func.date(ProcessedNews.processed_at) == target)
)
if category:
stmt = stmt.where(ProcessedNews.category == category)
count_stmt = select(func.count()).select_from(stmt.subquery())
total = (await db.execute(count_stmt)).scalar_one()
stmt = stmt.order_by(ProcessedNews.importance_score.desc()).offset((page - 1) * page_size).limit(page_size)
result = await db.execute(stmt)
items = result.scalars().all()
return {"date": str(target), "total": total, "page": page, "items": [_serialize(n) for n in items]}
@router.get("/dates")
async def get_dates(db: AsyncSession = Depends(get_db)):
stmt = select(
func.date(ProcessedNews.processed_at).label("d"),
func.count(ProcessedNews.id).label("cnt"),
).group_by("d").order_by(func.date(ProcessedNews.processed_at).desc()).limit(30)
result = await db.execute(stmt)
return [{"date": str(row.d), "count": row.cnt} for row in result]
@router.get("/{news_id}")
async def get_news_detail(news_id: int, db: AsyncSession = Depends(get_db)):
stmt = select(ProcessedNews).join(ProcessedNews.raw_news).where(ProcessedNews.id == news_id)
result = await db.execute(stmt)
news = result.scalar_one_or_none()
if not news:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="Not found")
return _serialize(news)

17
backend/app/config.py Normal file
View File

@@ -0,0 +1,17 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
database_url: str = "postgresql+asyncpg://pharma:pharma123@localhost/pharma_news"
admin_token: str = "change-me-admin-token"
initial_llm_provider: str = "deepseek"
initial_llm_api_key: str = ""
initial_llm_base_url: str = "https://api.deepseek.com"
initial_llm_model: str = "deepseek-chat"
class Config:
env_file = ".env"
settings = Settings()

View File

View File

@@ -0,0 +1,73 @@
import logging
from datetime import datetime
from email.utils import parsedate_to_datetime
from typing import Optional
import feedparser
import httpx
logger = logging.getLogger(__name__)
HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; PharmaIntelBot/1.0)"
}
def _parse_date(raw: str) -> Optional[datetime]:
if not raw:
return None
try:
return parsedate_to_datetime(raw).replace(tzinfo=None)
except Exception:
pass
try:
return datetime.fromisoformat(raw.replace("Z", "+00:00")).replace(tzinfo=None)
except Exception:
return None
async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
try:
async with httpx.AsyncClient(headers=HEADERS, timeout=30, follow_redirects=True) as client:
resp = await client.get(url)
resp.raise_for_status()
text = resp.text
except Exception as e:
logger.warning(f"RSS fetch failed {url}: {e}")
return []
feed = feedparser.parse(text)
items = []
for entry in feed.entries[:max_items]:
title = entry.get("title", "").strip()
link = entry.get("link", "").strip()
if not title or not link:
continue
content = (
entry.get("summary", "")
or entry.get("content", [{}])[0].get("value", "")
).strip()
published_raw = entry.get("published") or entry.get("updated") or ""
items.append({
"title": title,
"url": link,
"content": content[:3000],
"published_at": _parse_date(published_raw),
})
logger.info(f"RSS {url}: got {len(items)} items")
return items
# 默认新闻源(管理页可增删)
DEFAULT_SOURCES = [
# 中文
{"name": "国家药监局", "url": "https://www.nmpa.gov.cn/rss/yaopinxinxi.xml", "language": "zh", "category": "药品监管"},
{"name": "丁香园", "url": "https://www.dxy.cn/bbs/feed.xml", "language": "zh", "category": "临床研究"},
{"name": "医学界", "url": "https://www.yxj.org.cn/rss.xml", "language": "zh", "category": "行业动态"},
# 英文
{"name": "STAT News", "url": "https://www.statnews.com/feed/", "language": "en", "category": "临床研究"},
{"name": "FiercePharma", "url": "https://www.fiercepharma.com/rss/xml", "language": "en", "category": "行业动态"},
{"name": "FDA News", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/fda-news-feed/rss.xml", "language": "en", "category": "药品监管"},
]

21
backend/app/database.py Normal file
View File

@@ -0,0 +1,21 @@
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import DeclarativeBase
from .config import settings
engine = create_async_engine(settings.database_url, echo=False)
AsyncSessionLocal = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
class Base(DeclarativeBase):
pass
async def get_db():
async with AsyncSessionLocal() as session:
yield session
async def create_tables():
async with engine.begin() as conn:
from .models import news # noqa: ensure models are registered
await conn.run_sync(Base.metadata.create_all)

57
backend/app/main.py Normal file
View File

@@ -0,0 +1,57 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .database import create_tables, AsyncSessionLocal
from .scheduler import start_scheduler, shutdown_scheduler
from .api import news, admin
from .config import settings
from .models.news import LLMConfig
@asynccontextmanager
async def lifespan(app: FastAPI):
await create_tables()
await seed_initial_llm_config()
start_scheduler()
yield
shutdown_scheduler()
async def seed_initial_llm_config():
"""Insert default LLM config on first run if none exists."""
from sqlalchemy import select
async with AsyncSessionLocal() as db:
result = await db.execute(select(LLMConfig).limit(1))
if result.scalar_one_or_none():
return
if not settings.initial_llm_api_key:
return
config = LLMConfig(
name="默认配置",
provider=settings.initial_llm_provider,
api_key=settings.initial_llm_api_key,
base_url=settings.initial_llm_base_url,
model_name=settings.initial_llm_model,
is_active=True,
)
db.add(config)
await db.commit()
app = FastAPI(title="医药情报 API", version="1.0.0", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(news.router, prefix="/api/news", tags=["news"])
app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
@app.get("/api/health")
async def health():
return {"status": "ok"}

View File

View File

@@ -0,0 +1,81 @@
from datetime import datetime
from typing import Optional, List
from sqlalchemy import String, Text, Integer, Float, Boolean, DateTime, ForeignKey, ARRAY
from sqlalchemy.orm import Mapped, mapped_column, relationship
from ..database import Base
class NewsSource(Base):
__tablename__ = "news_sources"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(100))
url: Mapped[str] = mapped_column(String(500))
source_type: Mapped[str] = mapped_column(String(20), default="rss") # rss | scrape
language: Mapped[str] = mapped_column(String(5), default="zh") # zh | en
category: Mapped[Optional[str]] = mapped_column(String(50))
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
raw_news: Mapped[List["RawNews"]] = relationship(back_populates="source")
class RawNews(Base):
__tablename__ = "raw_news"
id: Mapped[int] = mapped_column(primary_key=True)
source_id: Mapped[Optional[int]] = mapped_column(ForeignKey("news_sources.id"))
title: Mapped[str] = mapped_column(String(500))
url: Mapped[str] = mapped_column(String(1000), unique=True)
raw_content: Mapped[Optional[str]] = mapped_column(Text)
published_at: Mapped[Optional[datetime]] = mapped_column(DateTime)
crawled_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
status: Mapped[str] = mapped_column(String(20), default="pending") # pending|processed|skipped|error
source: Mapped[Optional["NewsSource"]] = relationship(back_populates="raw_news")
processed: Mapped[Optional["ProcessedNews"]] = relationship(back_populates="raw_news", uselist=False)
class ProcessedNews(Base):
__tablename__ = "processed_news"
id: Mapped[int] = mapped_column(primary_key=True)
raw_news_id: Mapped[int] = mapped_column(ForeignKey("raw_news.id"))
title_zh: Mapped[str] = mapped_column(String(500))
summary: Mapped[str] = mapped_column(Text)
opinion: Mapped[Optional[str]] = mapped_column(Text)
keywords: Mapped[Optional[List[str]]] = mapped_column(ARRAY(String))
importance_score: Mapped[float] = mapped_column(Float, default=5.0)
importance_reason: Mapped[Optional[str]] = mapped_column(Text)
category: Mapped[str] = mapped_column(String(50), default="行业动态")
is_featured: Mapped[bool] = mapped_column(Boolean, default=False)
featured_rank: Mapped[Optional[int]] = mapped_column(Integer)
source_name: Mapped[Optional[str]] = mapped_column(String(200))
source_url: Mapped[Optional[str]] = mapped_column(String(1000))
published_at: Mapped[Optional[datetime]] = mapped_column(DateTime)
processed_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
raw_news: Mapped["RawNews"] = relationship(back_populates="processed")
class LLMConfig(Base):
__tablename__ = "llm_config"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(100))
provider: Mapped[str] = mapped_column(String(50)) # openai | anthropic | qwen | deepseek | custom
api_key: Mapped[str] = mapped_column(String(500))
base_url: Mapped[str] = mapped_column(String(500))
model_name: Mapped[str] = mapped_column(String(200))
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
class SystemLog(Base):
__tablename__ = "system_logs"
id: Mapped[int] = mapped_column(primary_key=True)
event_type: Mapped[str] = mapped_column(String(50))
message: Mapped[str] = mapped_column(Text)
level: Mapped[str] = mapped_column(String(20), default="INFO")
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

31
backend/app/scheduler.py Normal file
View File

@@ -0,0 +1,31 @@
import logging
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
logger = logging.getLogger(__name__)
scheduler = AsyncIOScheduler(timezone="Asia/Shanghai")
async def daily_pipeline_job():
from .database import AsyncSessionLocal
from .ai.processor import run_daily_pipeline
async with AsyncSessionLocal() as db:
try:
await run_daily_pipeline(db)
except Exception as e:
logger.error(f"Daily pipeline failed: {e}", exc_info=True)
def start_scheduler():
scheduler.add_job(daily_pipeline_job, CronTrigger(hour=6, minute=0), id="daily_pipeline", replace_existing=True)
scheduler.start()
logger.info("Scheduler started — daily pipeline runs at 06:00 Asia/Shanghai")
def shutdown_scheduler():
scheduler.shutdown(wait=False)
async def trigger_now():
"""Manually trigger the pipeline (called from admin API)."""
await daily_pipeline_job()