AI-Trend-Scout/tests/crawlers/test_new_crawlers.py
Artur Mukhamadiev 217037f72e feat(crawlers): convert multiple sources from Playwright to Static/RSS
- Added `StaticCrawler` for generic aiohttp+BS4 parsing.
- Added `SkolkovoCrawler` for specialized Next.js parsing of sk.ru.
- Converted ICRA 2025, RSF, CES 2025, and Telegram Addmeto to `static`.
- Converted Horizon Europe to `rss` using its native feed.
- Updated `CrawlerFactory` to support new crawler types.
- Validated changes with unit tests.
2026-03-15 21:21:14 +03:00

28 lines
1.0 KiB
Python

import pytest
import aiohttp
from src.crawlers.static_crawler import StaticCrawler
from src.crawlers.skolkovo_crawler import SkolkovoCrawler
from src.crawlers.dto import NewsItemDTO
@pytest.mark.asyncio
async def test_static_crawler_addmeto():
crawler = StaticCrawler(url="https://t.me/s/addmeto", source="Telegram: Addmeto", selector=".tgme_widget_message_text")
items = await crawler.fetch_latest()
assert len(items) > 0
assert items[0].source == "Telegram: Addmeto"
@pytest.mark.asyncio
async def test_static_crawler_rsf():
crawler = StaticCrawler(url="https://rscf.ru/en/news/", source="RSF", selector=".news-item")
items = await crawler.fetch_latest()
assert len(items) > 0
assert items[0].source == "RSF"
assert "rscf.ru" in items[0].url
@pytest.mark.asyncio
async def test_skolkovo_crawler():
crawler = SkolkovoCrawler(url="https://sk.ru/news/", source="Skolkovo")
items = await crawler.fetch_latest()
assert len(items) > 0
assert items[0].source == "Skolkovo"
assert "sk.ru" in items[0].url