AI-Trend-Scout/tests/crawlers/test_new_crawlers.py
Artur Mukhamadiev a49df98191 fix(tests): QA fixes for test suite verification
:Release Notes:
- Fix AsyncMock usage in mock_sqlite_store fixture (test_chroma_store.py)
- Add GitHubTrendingCrawler to isinstance check (test_factory.py)
- Replace live network calls with mocks (test_new_crawlers.py)

:Detailed Notes:
- ChromaStore tests were failing with TypeError due to sync MagicMock
- GitHubTrendingCrawler not in allowed types caused AssertionError
- Live crawler tests failed on network issues; now use robust mocks

:Testing Performed:
- python3 -m pytest tests/ -v (112 passed, 0 failed)

:QA Notes:
- All 112 tests passed after fixes
- Verified by Python QA Engineer subagent

:Issues Addressed:
- TypeError: 'list' object can't be awaited
- AssertionError: GitHubTrendingCrawler not in allowed types
- Live network tests flaky/failing

Change-Id: I3c77a186b5fcca6778c7bbb102c50bc6951bb37a
2026-03-30 13:54:53 +03:00

70 lines
2.6 KiB
Python

import pytest
import aiohttp
from datetime import datetime, timezone
from unittest.mock import patch, MagicMock, AsyncMock
from src.crawlers.static_crawler import StaticCrawler
from src.crawlers.skolkovo_crawler import SkolkovoCrawler
from src.crawlers.dto import NewsItemDTO
@pytest.mark.asyncio
async def test_static_crawler_addmeto():
html_content = """
<div class="tgme_widget_message_text">
<a href="https://t.me/addmeto/123">Message Link</a>
Some content text about AI.
</div>
"""
with patch("aiohttp.ClientSession.get") as mock_get:
mock_response = MagicMock()
mock_response.status = 200
mock_response.text = AsyncMock(return_value=html_content)
mock_response.__aenter__.return_value = mock_response
mock_get.return_value = mock_response
crawler = StaticCrawler(url="https://t.me/s/addmeto", source="Telegram: Addmeto", selector=".tgme_widget_message_text")
items = await crawler.fetch_latest()
assert len(items) > 0
assert items[0].source == "Telegram: Addmeto"
assert "t.me/addmeto/123" in items[0].url
@pytest.mark.asyncio
async def test_static_crawler_rsf():
html_content = """
<div class="news-item">
<a href="/en/news/123">RSF News Title</a>
Description of news.
</div>
"""
with patch("aiohttp.ClientSession.get") as mock_get:
mock_response = MagicMock()
mock_response.status = 200
mock_response.text = AsyncMock(return_value=html_content)
mock_response.__aenter__.return_value = mock_response
mock_get.return_value = mock_response
crawler = StaticCrawler(url="https://rscf.ru/en/news/", source="RSF", selector=".news-item")
items = await crawler.fetch_latest()
assert len(items) > 0
assert items[0].source == "RSF"
assert "rscf.ru/en/news/123" in items[0].url
@pytest.mark.asyncio
async def test_skolkovo_crawler():
html_content = """
<div class="news-list">
<div class="item">
<a href="/news/123">Skolkovo News</a>
</div>
</div>
"""
with patch("src.crawlers.playwright_crawler.PlaywrightCrawler.fetch_latest") as mock_fetch:
mock_fetch.return_value = [
NewsItemDTO(title="Skolkovo News", url="https://sk.ru/news/123", content_text="Text", source="Skolkovo", timestamp=datetime.now(timezone.utc))
]
crawler = SkolkovoCrawler(url="https://sk.ru/news/", source="Skolkovo")
items = await crawler.fetch_latest()
assert len(items) > 0
assert items[0].source == "Skolkovo"
assert "sk.ru" in items[0].url