v1.0定版
This commit is contained in:
@@ -25,18 +25,20 @@ ANALYSIS_PROMPT = """分析以下新闻,返回严格的 JSON 格式结果,
|
||||
"summary": "中文摘要(100-150字,客观陈述核心内容)",
|
||||
"opinion": "核心观点或行业影响(50-100字,分析性语言,点明实际意义)",
|
||||
"keywords": ["关键词1", "关键词2", "关键词3", "关键词4", "关键词5"],
|
||||
"importance_score": 8.5,
|
||||
"importance_score": 85,
|
||||
"importance_reason": "评分理由(30字内)",
|
||||
"category": "药品监管"
|
||||
}}
|
||||
|
||||
category 只能是以下四个之一:药品监管 / 临床研究 / 行业动态 / 政策法规
|
||||
|
||||
importance_score 评分标准(1-10):
|
||||
9-10:重大监管决定 / 突破性研究 / 影响整个行业的政策
|
||||
7-8 :行业重要动态,有明显商业或学术价值
|
||||
5-6 :常规行业新闻,有一定参考价值
|
||||
1-4 :普通资讯,信息价值有限
|
||||
importance_score 评分标准(1-100整数):
|
||||
90-100:重大监管决定 / 突破性研究 / 影响整个行业的政策
|
||||
70-89 :行业重要动态,有明显商业或学术价值
|
||||
50-69 :常规行业新闻,有一定参考价值
|
||||
1-49 :普通资讯,信息价值有限
|
||||
|
||||
注意:只有 85 分及以上的新闻才有资格进入每日精选,请严格区分。
|
||||
"""
|
||||
|
||||
|
||||
@@ -70,7 +72,8 @@ async def _analyze_article(client: LLMClient, title: str, content: str, language
|
||||
|
||||
|
||||
async def _select_top_10(db: AsyncSession, target: date):
|
||||
"""Reset featured flags and elect TOP 10 with category diversity."""
|
||||
"""Reset featured flags and elect TOP 10 with category diversity.
|
||||
Only news with importance_score >= 85 is eligible for 精选."""
|
||||
result = await db.execute(
|
||||
select(ProcessedNews)
|
||||
.where(func.date(ProcessedNews.processed_at) == target)
|
||||
@@ -78,25 +81,28 @@ async def _select_top_10(db: AsyncSession, target: date):
|
||||
)
|
||||
all_news = result.scalars().all()
|
||||
|
||||
# Reset
|
||||
# Reset all
|
||||
for n in all_news:
|
||||
n.is_featured = False
|
||||
n.featured_rank = None
|
||||
|
||||
# Only candidates with score >= 85
|
||||
candidates = [n for n in all_news if n.importance_score >= 85]
|
||||
|
||||
categories = ["药品监管", "临床研究", "行业动态", "政策法规"]
|
||||
selected: list[ProcessedNews] = []
|
||||
seen_cats: set[str] = set()
|
||||
|
||||
# First pass: one guaranteed per category
|
||||
# First pass: one guaranteed per category (from high-score candidates)
|
||||
for cat in categories:
|
||||
for n in all_news:
|
||||
for n in candidates:
|
||||
if n.category == cat and cat not in seen_cats and n not in selected:
|
||||
selected.append(n)
|
||||
seen_cats.add(cat)
|
||||
break
|
||||
|
||||
# Second pass: fill up to 10 by score
|
||||
for n in all_news:
|
||||
# Second pass: fill up to 10 by score (still from candidates only)
|
||||
for n in candidates:
|
||||
if len(selected) >= 10:
|
||||
break
|
||||
if n not in selected:
|
||||
@@ -141,6 +147,7 @@ async def run_daily_pipeline(db: AsyncSession):
|
||||
title=item["title"],
|
||||
url=item["url"],
|
||||
raw_content=item["content"],
|
||||
image_url=item.get("image_url"),
|
||||
published_at=item["published_at"],
|
||||
))
|
||||
raw_added += 1
|
||||
@@ -170,11 +177,12 @@ async def run_daily_pipeline(db: AsyncSession):
|
||||
summary=analysis.get("summary", ""),
|
||||
opinion=analysis.get("opinion"),
|
||||
keywords=analysis.get("keywords", []),
|
||||
importance_score=float(analysis.get("importance_score", 5.0)),
|
||||
importance_score=float(analysis.get("importance_score", 50.0)),
|
||||
importance_reason=analysis.get("importance_reason"),
|
||||
category=analysis.get("category", "行业动态"),
|
||||
source_name=raw.source.name if raw.source else "",
|
||||
source_url=raw.url,
|
||||
image_url=raw.image_url,
|
||||
published_at=raw.published_at,
|
||||
))
|
||||
raw.status = "processed"
|
||||
|
||||
Reference in New Issue
Block a user