完全跑通1.0版本

2026-05-26 12:56:03 +08:00
parent 2ece5174a7
commit 93c714a93b
11557 changed files with 1648225 additions and 36 deletions
--- a/backend/app/pycache/init.cpython-314.pyc
+++ b/backend/app/pycache/init.cpython-314.pyc
--- a/backend/app/pycache/config.cpython-314.pyc
+++ b/backend/app/pycache/config.cpython-314.pyc
--- a/backend/app/pycache/database.cpython-314.pyc
+++ b/backend/app/pycache/database.cpython-314.pyc
--- a/backend/app/pycache/main.cpython-314.pyc
+++ b/backend/app/pycache/main.cpython-314.pyc
--- a/backend/app/pycache/scheduler.cpython-314.pyc
+++ b/backend/app/pycache/scheduler.cpython-314.pyc
--- a/backend/app/ai/pycache/init.cpython-314.pyc
+++ b/backend/app/ai/pycache/init.cpython-314.pyc
--- a/backend/app/ai/pycache/llm_client.cpython-314.pyc
+++ b/backend/app/ai/pycache/llm_client.cpython-314.pyc
--- a/backend/app/ai/pycache/processor.cpython-314.pyc
+++ b/backend/app/ai/pycache/processor.cpython-314.pyc
--- a/backend/app/api/pycache/init.cpython-314.pyc
+++ b/backend/app/api/pycache/init.cpython-314.pyc
--- a/backend/app/api/pycache/admin.cpython-314.pyc
+++ b/backend/app/api/pycache/admin.cpython-314.pyc
--- a/backend/app/api/pycache/news.cpython-314.pyc
+++ b/backend/app/api/pycache/news.cpython-314.pyc
--- a/backend/app/api/news.py
+++ b/backend/app/api/news.py
@@ -1,17 +1,16 @@
-from datetime import date, datetime
+from datetime import date
 from typing import Optional
-from fastapi import APIRouter, Depends, Query
-from sqlalchemy import select, func, distinct
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy import select, func
 from sqlalchemy.ext.asyncio import AsyncSession

 from ..database import get_db
-from ..models.news import ProcessedNews, RawNews
+from ..models.news import ProcessedNews

 router = APIRouter()


 def _serialize(n: ProcessedNews) -> dict:
-    raw = n.raw_news
    return {
        "id": n.id,
        "title_zh": n.title_zh,
@@ -23,8 +22,8 @@ def _serialize(n: ProcessedNews) -> dict:
        "category": n.category,
        "is_featured": n.is_featured,
        "featured_rank": n.featured_rank,
-        "source_name": n.source_name or (raw.source.name if raw and raw.source else ""),
-        "source_url": n.source_url or (raw.url if raw else ""),
+        "source_name": n.source_name or "",
+        "source_url": n.source_url or "",
        "published_at": n.published_at.isoformat() if n.published_at else None,
        "processed_at": n.processed_at.isoformat() if n.processed_at else None,
    }
@@ -38,7 +37,6 @@ async def get_featured(
    target = date.fromisoformat(news_date) if news_date else date.today()
    stmt = (
        select(ProcessedNews)
-        .join(ProcessedNews.raw_news)
        .where(ProcessedNews.is_featured == True)
        .where(func.date(ProcessedNews.processed_at) == target)
        .order_by(ProcessedNews.featured_rank)
@@ -57,11 +55,7 @@ async def get_news(
    db: AsyncSession = Depends(get_db),
 ):
    target = date.fromisoformat(news_date) if news_date else date.today()
-    stmt = (
-        select(ProcessedNews)
-        .join(ProcessedNews.raw_news)
-        .where(func.date(ProcessedNews.processed_at) == target)
-    )
+    stmt = select(ProcessedNews).where(func.date(ProcessedNews.processed_at) == target)
    if category:
        stmt = stmt.where(ProcessedNews.category == category)

@@ -87,10 +81,9 @@ async def get_dates(db: AsyncSession = Depends(get_db)):

@router.get("/{news_id}")
 async def get_news_detail(news_id: int, db: AsyncSession = Depends(get_db)):
-    stmt = select(ProcessedNews).join(ProcessedNews.raw_news).where(ProcessedNews.id == news_id)
+    stmt = select(ProcessedNews).where(ProcessedNews.id == news_id)
    result = await db.execute(stmt)
    news = result.scalar_one_or_none()
    if not news:
-        from fastapi import HTTPException
        raise HTTPException(status_code=404, detail="Not found")
    return _serialize(news)
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -2,7 +2,7 @@ from pydantic_settings import BaseSettings


 class Settings(BaseSettings):
-    database_url: str = "postgresql+asyncpg://pharma:pharma123@localhost/pharma_news"
+    database_url: str = "postgresql+asyncpg://ai_news:PrDTEr6tGcyWX6G2@chenwuzhu.cn:5432/ai_news"
    admin_token: str = "change-me-admin-token"

    initial_llm_provider: str = "deepseek"
--- a/backend/app/crawler/pycache/init.cpython-314.pyc
+++ b/backend/app/crawler/pycache/init.cpython-314.pyc
--- a/backend/app/crawler/pycache/rss_fetcher.cpython-314.pyc
+++ b/backend/app/crawler/pycache/rss_fetcher.cpython-314.pyc
--- a/backend/app/crawler/rss_fetcher.py
+++ b/backend/app/crawler/rss_fetcher.py
@@ -60,14 +60,15 @@ async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
    return items


-# 默认新闻源（管理页可增删）
+# 默认新闻源（管理页可增删）— URLs 经过验证可用
 DEFAULT_SOURCES = [
    # 中文
-    {"name": "国家药监局", "url": "https://www.nmpa.gov.cn/rss/yaopinxinxi.xml", "language": "zh", "category": "药品监管"},
-    {"name": "丁香园", "url": "https://www.dxy.cn/bbs/feed.xml", "language": "zh", "category": "临床研究"},
-    {"name": "医学界", "url": "https://www.yxj.org.cn/rss.xml", "language": "zh", "category": "行业动态"},
+    {"name": "中国新闻网·健康", "url": "https://www.chinanews.com.cn/rss/health.xml", "language": "zh", "category": "行业动态"},
    # 英文
    {"name": "STAT News", "url": "https://www.statnews.com/feed/", "language": "en", "category": "临床研究"},
    {"name": "FiercePharma", "url": "https://www.fiercepharma.com/rss/xml", "language": "en", "category": "行业动态"},
-    {"name": "FDA News", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/fda-news-feed/rss.xml", "language": "en", "category": "药品监管"},
+    {"name": "FierceBiotech", "url": "https://www.fiercebiotech.com/rss/xml", "language": "en", "category": "临床研究"},
+    {"name": "FDA MedWatch", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/medwatch/rss.xml", "language": "en", "category": "药品监管"},
+    {"name": "FDA Press Releases", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/press-releases/rss.xml", "language": "en", "category": "药品监管"},
+    {"name": "Nature Medicine", "url": "https://www.nature.com/nm.rss", "language": "en", "category": "临床研究"},
 ]
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -6,18 +6,38 @@ from .database import create_tables, AsyncSessionLocal
 from .scheduler import start_scheduler, shutdown_scheduler
 from .api import news, admin
 from .config import settings
-from .models.news import LLMConfig
+from .models.news import LLMConfig, NewsSource


@asynccontextmanager
 async def lifespan(app: FastAPI):
    await create_tables()
    await seed_initial_llm_config()
+    await seed_default_sources()
    start_scheduler()
    yield
    shutdown_scheduler()


+async def seed_default_sources():
+    """Insert default news sources on first run if the table is empty."""
+    from .crawler.rss_fetcher import DEFAULT_SOURCES
+    from sqlalchemy import select
+    async with AsyncSessionLocal() as db:
+        result = await db.execute(select(NewsSource).limit(1))
+        if result.scalar_one_or_none():
+            return
+        for src in DEFAULT_SOURCES:
+            db.add(NewsSource(
+                name=src["name"],
+                url=src["url"],
+                source_type="rss",
+                language=src["language"],
+                category=src["category"],
+            ))
+        await db.commit()
+
+
 async def seed_initial_llm_config():
    """Insert default LLM config on first run if none exists."""
    from sqlalchemy import select
--- a/backend/app/models/pycache/init.cpython-314.pyc
+++ b/backend/app/models/pycache/init.cpython-314.pyc
--- a/backend/app/models/pycache/news.cpython-314.pyc
+++ b/backend/app/models/pycache/news.cpython-314.pyc