v1.0定版

This commit is contained in:
2026-05-27 17:14:08 +08:00
parent 1b7210de4f
commit 5b19d9fe69
32 changed files with 2074 additions and 2915 deletions

View File

@@ -25,6 +25,31 @@ def _parse_date(raw: str) -> Optional[datetime]:
return None
def _extract_image(entry) -> Optional[str]:
"""Try to pull an image URL from common RSS media extensions."""
# <media:thumbnail>
thumbnails = getattr(entry, "media_thumbnail", [])
if thumbnails:
url = thumbnails[0].get("url", "").strip()
if url:
return url
# <media:content medium="image">
for mc in getattr(entry, "media_content", []):
mc_type = mc.get("type", "")
mc_medium = mc.get("medium", "")
if mc_medium == "image" or mc_type.startswith("image/"):
url = mc.get("url", "").strip()
if url:
return url
# <enclosure type="image/...">
for enc in getattr(entry, "enclosures", []):
if enc.get("type", "").startswith("image/"):
url = (enc.get("href") or enc.get("url") or "").strip()
if url:
return url
return None
async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
try:
async with httpx.AsyncClient(headers=HEADERS, timeout=30, follow_redirects=True) as client:
@@ -54,6 +79,7 @@ async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
"url": link,
"content": content[:3000],
"published_at": _parse_date(published_raw),
"image_url": _extract_image(entry),
})
logger.info(f"RSS {url}: got {len(items)} items")