完全跑通1.0版本

This commit is contained in:
2026-05-26 12:56:03 +08:00
parent 2ece5174a7
commit 93c714a93b
11557 changed files with 1648225 additions and 36 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,17 +1,16 @@
from datetime import date, datetime
from datetime import date
from typing import Optional
from fastapi import APIRouter, Depends, Query
from sqlalchemy import select, func, distinct
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from ..database import get_db
from ..models.news import ProcessedNews, RawNews
from ..models.news import ProcessedNews
router = APIRouter()
def _serialize(n: ProcessedNews) -> dict:
raw = n.raw_news
return {
"id": n.id,
"title_zh": n.title_zh,
@@ -23,8 +22,8 @@ def _serialize(n: ProcessedNews) -> dict:
"category": n.category,
"is_featured": n.is_featured,
"featured_rank": n.featured_rank,
"source_name": n.source_name or (raw.source.name if raw and raw.source else ""),
"source_url": n.source_url or (raw.url if raw else ""),
"source_name": n.source_name or "",
"source_url": n.source_url or "",
"published_at": n.published_at.isoformat() if n.published_at else None,
"processed_at": n.processed_at.isoformat() if n.processed_at else None,
}
@@ -38,7 +37,6 @@ async def get_featured(
target = date.fromisoformat(news_date) if news_date else date.today()
stmt = (
select(ProcessedNews)
.join(ProcessedNews.raw_news)
.where(ProcessedNews.is_featured == True)
.where(func.date(ProcessedNews.processed_at) == target)
.order_by(ProcessedNews.featured_rank)
@@ -57,11 +55,7 @@ async def get_news(
db: AsyncSession = Depends(get_db),
):
target = date.fromisoformat(news_date) if news_date else date.today()
stmt = (
select(ProcessedNews)
.join(ProcessedNews.raw_news)
.where(func.date(ProcessedNews.processed_at) == target)
)
stmt = select(ProcessedNews).where(func.date(ProcessedNews.processed_at) == target)
if category:
stmt = stmt.where(ProcessedNews.category == category)
@@ -87,10 +81,9 @@ async def get_dates(db: AsyncSession = Depends(get_db)):
@router.get("/{news_id}")
async def get_news_detail(news_id: int, db: AsyncSession = Depends(get_db)):
stmt = select(ProcessedNews).join(ProcessedNews.raw_news).where(ProcessedNews.id == news_id)
stmt = select(ProcessedNews).where(ProcessedNews.id == news_id)
result = await db.execute(stmt)
news = result.scalar_one_or_none()
if not news:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="Not found")
return _serialize(news)

View File

@@ -2,7 +2,7 @@ from pydantic_settings import BaseSettings
class Settings(BaseSettings):
database_url: str = "postgresql+asyncpg://pharma:pharma123@localhost/pharma_news"
database_url: str = "postgresql+asyncpg://ai_news:PrDTEr6tGcyWX6G2@chenwuzhu.cn:5432/ai_news"
admin_token: str = "change-me-admin-token"
initial_llm_provider: str = "deepseek"

View File

@@ -60,14 +60,15 @@ async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
return items
# 默认新闻源(管理页可增删)
# 默认新闻源(管理页可增删)— URLs 经过验证可用
DEFAULT_SOURCES = [
# 中文
{"name": "国家药监局", "url": "https://www.nmpa.gov.cn/rss/yaopinxinxi.xml", "language": "zh", "category": "药品监管"},
{"name": "丁香园", "url": "https://www.dxy.cn/bbs/feed.xml", "language": "zh", "category": "临床研究"},
{"name": "医学界", "url": "https://www.yxj.org.cn/rss.xml", "language": "zh", "category": "行业动态"},
{"name": "中国新闻网·健康", "url": "https://www.chinanews.com.cn/rss/health.xml", "language": "zh", "category": "行业动态"},
# 英文
{"name": "STAT News", "url": "https://www.statnews.com/feed/", "language": "en", "category": "临床研究"},
{"name": "FiercePharma", "url": "https://www.fiercepharma.com/rss/xml", "language": "en", "category": "行业动态"},
{"name": "FDA News", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/fda-news-feed/rss.xml", "language": "en", "category": "药品监管"},
{"name": "FierceBiotech", "url": "https://www.fiercebiotech.com/rss/xml", "language": "en", "category": "临床研究"},
{"name": "FDA MedWatch", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/medwatch/rss.xml", "language": "en", "category": "药品监管"},
{"name": "FDA Press Releases", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/press-releases/rss.xml", "language": "en", "category": "药品监管"},
{"name": "Nature Medicine", "url": "https://www.nature.com/nm.rss", "language": "en", "category": "临床研究"},
]

View File

@@ -6,18 +6,38 @@ from .database import create_tables, AsyncSessionLocal
from .scheduler import start_scheduler, shutdown_scheduler
from .api import news, admin
from .config import settings
from .models.news import LLMConfig
from .models.news import LLMConfig, NewsSource
@asynccontextmanager
async def lifespan(app: FastAPI):
await create_tables()
await seed_initial_llm_config()
await seed_default_sources()
start_scheduler()
yield
shutdown_scheduler()
async def seed_default_sources():
"""Insert default news sources on first run if the table is empty."""
from .crawler.rss_fetcher import DEFAULT_SOURCES
from sqlalchemy import select
async with AsyncSessionLocal() as db:
result = await db.execute(select(NewsSource).limit(1))
if result.scalar_one_or_none():
return
for src in DEFAULT_SOURCES:
db.add(NewsSource(
name=src["name"],
url=src["url"],
source_type="rss",
language=src["language"],
category=src["category"],
))
await db.commit()
async def seed_initial_llm_config():
"""Insert default LLM config on first run if none exists."""
from sqlalchemy import select

Binary file not shown.