v1.0定版
This commit is contained in:
Binary file not shown.
@@ -25,6 +25,31 @@ def _parse_date(raw: str) -> Optional[datetime]:
|
||||
return None
|
||||
|
||||
|
||||
def _extract_image(entry) -> Optional[str]:
|
||||
"""Try to pull an image URL from common RSS media extensions."""
|
||||
# <media:thumbnail>
|
||||
thumbnails = getattr(entry, "media_thumbnail", [])
|
||||
if thumbnails:
|
||||
url = thumbnails[0].get("url", "").strip()
|
||||
if url:
|
||||
return url
|
||||
# <media:content medium="image">
|
||||
for mc in getattr(entry, "media_content", []):
|
||||
mc_type = mc.get("type", "")
|
||||
mc_medium = mc.get("medium", "")
|
||||
if mc_medium == "image" or mc_type.startswith("image/"):
|
||||
url = mc.get("url", "").strip()
|
||||
if url:
|
||||
return url
|
||||
# <enclosure type="image/...">
|
||||
for enc in getattr(entry, "enclosures", []):
|
||||
if enc.get("type", "").startswith("image/"):
|
||||
url = (enc.get("href") or enc.get("url") or "").strip()
|
||||
if url:
|
||||
return url
|
||||
return None
|
||||
|
||||
|
||||
async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
|
||||
try:
|
||||
async with httpx.AsyncClient(headers=HEADERS, timeout=30, follow_redirects=True) as client:
|
||||
@@ -54,6 +79,7 @@ async def fetch_rss(url: str, max_items: int = 30) -> list[dict]:
|
||||
"url": link,
|
||||
"content": content[:3000],
|
||||
"published_at": _parse_date(published_raw),
|
||||
"image_url": _extract_image(entry),
|
||||
})
|
||||
|
||||
logger.info(f"RSS {url}: got {len(items)} items")
|
||||
|
||||
Reference in New Issue
Block a user