import logging from datetime import datetime from email.utils import parsedate_to_datetime from typing import Optional import feedparser import httpx logger = logging.getLogger(__name__) HEADERS = { "User-Agent": "Mozilla/5.0 (compatible; PharmaIntelBot/1.0)" } def _parse_date(raw: str) -> Optional[datetime]: if not raw: return None try: return parsedate_to_datetime(raw).replace(tzinfo=None) except Exception: pass try: return datetime.fromisoformat(raw.replace("Z", "+00:00")).replace(tzinfo=None) except Exception: return None async def fetch_rss(url: str, max_items: int = 30) -> list[dict]: try: async with httpx.AsyncClient(headers=HEADERS, timeout=30, follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() text = resp.text except Exception as e: logger.warning(f"RSS fetch failed {url}: {e}") return [] feed = feedparser.parse(text) items = [] for entry in feed.entries[:max_items]: title = entry.get("title", "").strip() link = entry.get("link", "").strip() if not title or not link: continue content = ( entry.get("summary", "") or entry.get("content", [{}])[0].get("value", "") ).strip() published_raw = entry.get("published") or entry.get("updated") or "" items.append({ "title": title, "url": link, "content": content[:3000], "published_at": _parse_date(published_raw), }) logger.info(f"RSS {url}: got {len(items)} items") return items # 默认新闻源(管理页可增删) DEFAULT_SOURCES = [ # 中文 {"name": "国家药监局", "url": "https://www.nmpa.gov.cn/rss/yaopinxinxi.xml", "language": "zh", "category": "药品监管"}, {"name": "丁香园", "url": "https://www.dxy.cn/bbs/feed.xml", "language": "zh", "category": "临床研究"}, {"name": "医学界", "url": "https://www.yxj.org.cn/rss.xml", "language": "zh", "category": "行业动态"}, # 英文 {"name": "STAT News", "url": "https://www.statnews.com/feed/", "language": "en", "category": "临床研究"}, {"name": "FiercePharma", "url": "https://www.fiercepharma.com/rss/xml", "language": "en", "category": "行业动态"}, {"name": "FDA News", "url": "https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/fda-news-feed/rss.xml", "language": "en", "category": "药品监管"}, ]