import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from datetime import datetime, timezone
from src.crawlers.scirate_crawler import SciRateCrawler
from src.crawlers.dto import NewsItemDTO
MOCK_SCIRATE_HTML = """
Vaswani et al.
This paper presents a new architecture...
Doe and Smith
Abstract of another paper.
"""
@pytest.mark.asyncio
async def test_scirate_crawler_fetch_latest():
url = "https://scirate.com/"
source = "SciRate"
crawler = SciRateCrawler(url, source)
# HTML with multiple items, one missing title, one with list-like link
mock_html = """
No link here
"""
with patch("aiohttp.ClientSession.get") as mock_get:
mock_response = AsyncMock()
mock_response.text.return_value = mock_html
mock_response.status = 200
mock_get.return_value.__aenter__.return_value = mock_response
# We also want to test the 'isinstance(link, list)' part.
# This is tricky because BS4 normally doesn't return a list for href.
# But we can mock title_el.get to return a list.
with patch("bs4.element.Tag.get", side_effect=[["/arxiv/list"], "/arxiv/3"]):
items = await crawler.fetch_latest()
assert len(items) == 2
assert items[0].url == "https://scirate.com/arxiv/list"
assert items[1].url == "https://scirate.com/arxiv/3"
@pytest.mark.asyncio
async def test_scirate_crawler_exception():
crawler = SciRateCrawler()
with patch("aiohttp.ClientSession.get") as mock_get:
mock_response = AsyncMock()
mock_response.text.return_value = ""
mock_response.status = 200
mock_get.return_value.__aenter__.return_value = mock_response
# Force an exception in parse_html
with patch.object(SciRateCrawler, 'parse_html', side_effect=Exception("Parsing failed")):
items = await crawler.fetch_latest()
assert items == []
@pytest.mark.asyncio
async def test_scirate_crawler_error():
crawler = SciRateCrawler()
with patch("aiohttp.ClientSession.get") as mock_get:
mock_response = AsyncMock()
mock_response.status = 500
mock_get.return_value.__aenter__.return_value = mock_response
items = await crawler.fetch_latest()
assert items == []