v1.0定版

This commit is contained in:
2026-05-27 17:14:08 +08:00
parent 1b7210de4f
commit 5b19d9fe69
32 changed files with 2074 additions and 2915 deletions

View File

@@ -25,18 +25,20 @@ ANALYSIS_PROMPT = """分析以下新闻,返回严格的 JSON 格式结果,
"summary": "中文摘要100-150字客观陈述核心内容",
"opinion": "核心观点或行业影响50-100字分析性语言点明实际意义",
"keywords": ["关键词1", "关键词2", "关键词3", "关键词4", "关键词5"],
"importance_score": 8.5,
"importance_score": 85,
"importance_reason": "评分理由30字内",
"category": "药品监管"
}}
category 只能是以下四个之一:药品监管 / 临床研究 / 行业动态 / 政策法规
importance_score 评分标准1-10
9-10重大监管决定 / 突破性研究 / 影响整个行业的政策
7-8 :行业重要动态,有明显商业或学术价值
5-6 :常规行业新闻,有一定参考价值
1-4 :普通资讯,信息价值有限
importance_score 评分标准1-100整数
90-100:重大监管决定 / 突破性研究 / 影响整个行业的政策
70-89 :行业重要动态,有明显商业或学术价值
50-69 :常规行业新闻,有一定参考价值
1-49 :普通资讯,信息价值有限
注意:只有 85 分及以上的新闻才有资格进入每日精选,请严格区分。
"""
@@ -70,7 +72,8 @@ async def _analyze_article(client: LLMClient, title: str, content: str, language
async def _select_top_10(db: AsyncSession, target: date):
"""Reset featured flags and elect TOP 10 with category diversity."""
"""Reset featured flags and elect TOP 10 with category diversity.
Only news with importance_score >= 85 is eligible for 精选."""
result = await db.execute(
select(ProcessedNews)
.where(func.date(ProcessedNews.processed_at) == target)
@@ -78,25 +81,28 @@ async def _select_top_10(db: AsyncSession, target: date):
)
all_news = result.scalars().all()
# Reset
# Reset all
for n in all_news:
n.is_featured = False
n.featured_rank = None
# Only candidates with score >= 85
candidates = [n for n in all_news if n.importance_score >= 85]
categories = ["药品监管", "临床研究", "行业动态", "政策法规"]
selected: list[ProcessedNews] = []
seen_cats: set[str] = set()
# First pass: one guaranteed per category
# First pass: one guaranteed per category (from high-score candidates)
for cat in categories:
for n in all_news:
for n in candidates:
if n.category == cat and cat not in seen_cats and n not in selected:
selected.append(n)
seen_cats.add(cat)
break
# Second pass: fill up to 10 by score
for n in all_news:
# Second pass: fill up to 10 by score (still from candidates only)
for n in candidates:
if len(selected) >= 10:
break
if n not in selected:
@@ -141,6 +147,7 @@ async def run_daily_pipeline(db: AsyncSession):
title=item["title"],
url=item["url"],
raw_content=item["content"],
image_url=item.get("image_url"),
published_at=item["published_at"],
))
raw_added += 1
@@ -170,11 +177,12 @@ async def run_daily_pipeline(db: AsyncSession):
summary=analysis.get("summary", ""),
opinion=analysis.get("opinion"),
keywords=analysis.get("keywords", []),
importance_score=float(analysis.get("importance_score", 5.0)),
importance_score=float(analysis.get("importance_score", 50.0)),
importance_reason=analysis.get("importance_reason"),
category=analysis.get("category", "行业动态"),
source_name=raw.source.name if raw.source else "",
source_url=raw.url,
image_url=raw.image_url,
published_at=raw.published_at,
))
raw.status = "processed"