v1.0定版

2026-05-27 17:14:08 +08:00
parent 1b7210de4f
commit 5b19d9fe69
32 changed files with 2074 additions and 2915 deletions
--- a/backend/app/ai/processor.py
+++ b/backend/app/ai/processor.py
@@ -25,18 +25,20 @@ ANALYSIS_PROMPT = """分析以下新闻，返回严格的 JSON 格式结果，
  "summary": "中文摘要（100-150字，客观陈述核心内容）",
  "opinion": "核心观点或行业影响（50-100字，分析性语言，点明实际意义）",
  "keywords": ["关键词1", "关键词2", "关键词3", "关键词4", "关键词5"],
-  "importance_score": 8.5,
+  "importance_score": 85,
  "importance_reason": "评分理由（30字内）",
  "category": "药品监管"
 }}

 category 只能是以下四个之一：药品监管 / 临床研究 / 行业动态 / 政策法规

-importance_score 评分标准（1-10）：
-9-10：重大监管决定 / 突破性研究 / 影响整个行业的政策
-7-8 ：行业重要动态，有明显商业或学术价值
-5-6 ：常规行业新闻，有一定参考价值
-1-4 ：普通资讯，信息价值有限
+importance_score 评分标准（1-100整数）：
+90-100：重大监管决定 / 突破性研究 / 影响整个行业的政策
+70-89 ：行业重要动态，有明显商业或学术价值
+50-69 ：常规行业新闻，有一定参考价值
+1-49  ：普通资讯，信息价值有限
+
+注意：只有 85 分及以上的新闻才有资格进入每日精选，请严格区分。
 """


@@ -70,7 +72,8 @@ async def _analyze_article(client: LLMClient, title: str, content: str, language


 async def _select_top_10(db: AsyncSession, target: date):
-    """Reset featured flags and elect TOP 10 with category diversity."""
+    """Reset featured flags and elect TOP 10 with category diversity.
+    Only news with importance_score >= 85 is eligible for 精选."""
    result = await db.execute(
        select(ProcessedNews)
        .where(func.date(ProcessedNews.processed_at) == target)
@@ -78,25 +81,28 @@ async def _select_top_10(db: AsyncSession, target: date):
    )
    all_news = result.scalars().all()

-    # Reset
+    # Reset all
    for n in all_news:
        n.is_featured = False
        n.featured_rank = None

+    # Only candidates with score >= 85
+    candidates = [n for n in all_news if n.importance_score >= 85]
+
    categories = ["药品监管", "临床研究", "行业动态", "政策法规"]
    selected: list[ProcessedNews] = []
    seen_cats: set[str] = set()

-    # First pass: one guaranteed per category
+    # First pass: one guaranteed per category (from high-score candidates)
    for cat in categories:
-        for n in all_news:
+        for n in candidates:
            if n.category == cat and cat not in seen_cats and n not in selected:
                selected.append(n)
                seen_cats.add(cat)
                break

-    # Second pass: fill up to 10 by score
-    for n in all_news:
+    # Second pass: fill up to 10 by score (still from candidates only)
+    for n in candidates:
        if len(selected) >= 10:
            break
        if n not in selected:
@@ -141,6 +147,7 @@ async def run_daily_pipeline(db: AsyncSession):
                title=item["title"],
                url=item["url"],
                raw_content=item["content"],
+                image_url=item.get("image_url"),
                published_at=item["published_at"],
            ))
            raw_added += 1
@@ -170,11 +177,12 @@ async def run_daily_pipeline(db: AsyncSession):
                summary=analysis.get("summary", ""),
                opinion=analysis.get("opinion"),
                keywords=analysis.get("keywords", []),
-                importance_score=float(analysis.get("importance_score", 5.0)),
+                importance_score=float(analysis.get("importance_score", 50.0)),
                importance_reason=analysis.get("importance_reason"),
                category=analysis.get("category", "行业动态"),
                source_name=raw.source.name if raw.source else "",
                source_url=raw.url,
+                image_url=raw.image_url,
                published_at=raw.published_at,
            ))
            raw.status = "processed"