:Release Notes: - Fix AsyncMock usage in mock_sqlite_store fixture (test_chroma_store.py) - Add GitHubTrendingCrawler to isinstance check (test_factory.py) - Replace live network calls with mocks (test_new_crawlers.py) :Detailed Notes: - ChromaStore tests were failing with TypeError due to sync MagicMock - GitHubTrendingCrawler not in allowed types caused AssertionError - Live crawler tests failed on network issues; now use robust mocks :Testing Performed: - python3 -m pytest tests/ -v (112 passed, 0 failed) :QA Notes: - All 112 tests passed after fixes - Verified by Python QA Engineer subagent :Issues Addressed: - TypeError: 'list' object can't be awaited - AssertionError: GitHubTrendingCrawler not in allowed types - Live network tests flaky/failing Change-Id: I3c77a186b5fcca6778c7bbb102c50bc6951bb37a
70 lines
2.6 KiB
Python
70 lines
2.6 KiB
Python
import pytest
|
|
import aiohttp
|
|
from datetime import datetime, timezone
|
|
from unittest.mock import patch, MagicMock, AsyncMock
|
|
from src.crawlers.static_crawler import StaticCrawler
|
|
from src.crawlers.skolkovo_crawler import SkolkovoCrawler
|
|
from src.crawlers.dto import NewsItemDTO
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_static_crawler_addmeto():
|
|
html_content = """
|
|
<div class="tgme_widget_message_text">
|
|
<a href="https://t.me/addmeto/123">Message Link</a>
|
|
Some content text about AI.
|
|
</div>
|
|
"""
|
|
with patch("aiohttp.ClientSession.get") as mock_get:
|
|
mock_response = MagicMock()
|
|
mock_response.status = 200
|
|
mock_response.text = AsyncMock(return_value=html_content)
|
|
mock_response.__aenter__.return_value = mock_response
|
|
mock_get.return_value = mock_response
|
|
|
|
crawler = StaticCrawler(url="https://t.me/s/addmeto", source="Telegram: Addmeto", selector=".tgme_widget_message_text")
|
|
items = await crawler.fetch_latest()
|
|
assert len(items) > 0
|
|
assert items[0].source == "Telegram: Addmeto"
|
|
assert "t.me/addmeto/123" in items[0].url
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_static_crawler_rsf():
|
|
html_content = """
|
|
<div class="news-item">
|
|
<a href="/en/news/123">RSF News Title</a>
|
|
Description of news.
|
|
</div>
|
|
"""
|
|
with patch("aiohttp.ClientSession.get") as mock_get:
|
|
mock_response = MagicMock()
|
|
mock_response.status = 200
|
|
mock_response.text = AsyncMock(return_value=html_content)
|
|
mock_response.__aenter__.return_value = mock_response
|
|
mock_get.return_value = mock_response
|
|
|
|
crawler = StaticCrawler(url="https://rscf.ru/en/news/", source="RSF", selector=".news-item")
|
|
items = await crawler.fetch_latest()
|
|
assert len(items) > 0
|
|
assert items[0].source == "RSF"
|
|
assert "rscf.ru/en/news/123" in items[0].url
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_skolkovo_crawler():
|
|
html_content = """
|
|
<div class="news-list">
|
|
<div class="item">
|
|
<a href="/news/123">Skolkovo News</a>
|
|
</div>
|
|
</div>
|
|
"""
|
|
with patch("src.crawlers.playwright_crawler.PlaywrightCrawler.fetch_latest") as mock_fetch:
|
|
mock_fetch.return_value = [
|
|
NewsItemDTO(title="Skolkovo News", url="https://sk.ru/news/123", content_text="Text", source="Skolkovo", timestamp=datetime.now(timezone.utc))
|
|
]
|
|
crawler = SkolkovoCrawler(url="https://sk.ru/news/", source="Skolkovo")
|
|
items = await crawler.fetch_latest()
|
|
assert len(items) > 0
|
|
assert items[0].source == "Skolkovo"
|
|
assert "sk.ru" in items[0].url
|
|
|